diff options
Diffstat (limited to 'drivers')
832 files changed, 357396 insertions, 24774 deletions
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index d2a5f1184022..2408ea38a39c 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -121,7 +121,6 @@ config QCOM_EBI2 config SIMPLE_PM_BUS bool "Simple Power-Managed Bus Driver" depends on OF && PM - depends on ARCH_RENESAS || COMPILE_TEST help Driver for transparent busses that don't need a real driver, but where the bus controller is part of a PM domain, or under the control diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 512bdbc23bbb..4a038dcf5361 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -558,8 +558,8 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, if (WARN_ON(!dmabuf || !dev)) return ERR_PTR(-EINVAL); - attach = kzalloc(sizeof(struct dma_buf_attachment), GFP_KERNEL); - if (attach == NULL) + attach = kzalloc(sizeof(*attach), GFP_KERNEL); + if (!attach) return ERR_PTR(-ENOMEM); attach->dev = dev; @@ -1122,9 +1122,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) attach_count = 0; list_for_each_entry(attach_obj, &buf_obj->attachments, node) { - seq_puts(s, "\t"); - - seq_printf(s, "%s\n", dev_name(attach_obj->dev)); + seq_printf(s, "\t%s\n", dev_name(attach_obj->dev)); attach_count++; } diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 0918d3f003d6..57da14c15987 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -402,6 +402,11 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) } } + if (!timeout) { + ret = 0; + goto out; + } + cb.base.func = dma_fence_default_wait_cb; cb.task = current; list_add(&cb.base.node, &fence->cb_list); diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index c769dc653b34..82a6e7f6d37f 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -110,7 +110,7 @@ static void sync_print_fence(struct seq_file *s, } } - seq_puts(s, "\n"); + seq_putc(s, '\n'); } static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) @@ -132,9 +132,11 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) static void sync_print_sync_file(struct seq_file *s, struct sync_file *sync_file) { + char buf[128]; int i; - seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name, + seq_printf(s, "[%p] %s: %s\n", sync_file, + sync_file_get_name(sync_file, buf, sizeof(buf)), sync_status_str(dma_fence_get_status(sync_file->fence))); if (dma_fence_is_array(sync_file->fence)) { @@ -161,7 +163,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused) sync_timeline_list); sync_print_obj(s, obj); - seq_puts(s, "\n"); + seq_putc(s, '\n'); } spin_unlock_irqrestore(&sync_timeline_list_lock, flags); @@ -173,7 +175,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused) container_of(pos, struct sync_file, sync_file_list); sync_print_sync_file(s, sync_file); - seq_puts(s, "\n"); + seq_putc(s, '\n'); } spin_unlock_irqrestore(&sync_file_list_lock, flags); return 0; diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 2321035f6204..545e2c5c4815 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -41,8 +41,6 @@ static struct sync_file *sync_file_alloc(void) if (IS_ERR(sync_file->file)) goto err; - kref_init(&sync_file->kref); - init_waitqueue_head(&sync_file->wq); INIT_LIST_HEAD(&sync_file->cb.node); @@ -82,11 +80,6 @@ struct sync_file *sync_file_create(struct dma_fence *fence) sync_file->fence = dma_fence_get(fence); - snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d", - fence->ops->get_driver_name(fence), - fence->ops->get_timeline_name(fence), fence->context, - fence->seqno); - return sync_file; } EXPORT_SYMBOL(sync_file_create); @@ -131,6 +124,36 @@ struct dma_fence *sync_file_get_fence(int fd) } EXPORT_SYMBOL(sync_file_get_fence); +/** + * sync_file_get_name - get the name of the sync_file + * @sync_file: sync_file to get the fence from + * @buf: destination buffer to copy sync_file name into + * @len: available size of destination buffer. + * + * Each sync_file may have a name assigned either by the user (when merging + * sync_files together) or created from the fence it contains. In the latter + * case construction of the name is deferred until use, and so requires + * sync_file_get_name(). + * + * Returns: a string representing the name. + */ +char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len) +{ + if (sync_file->user_name[0]) { + strlcpy(buf, sync_file->user_name, len); + } else { + struct dma_fence *fence = sync_file->fence; + + snprintf(buf, len, "%s-%s%llu-%d", + fence->ops->get_driver_name(fence), + fence->ops->get_timeline_name(fence), + fence->context, + fence->seqno); + } + + return buf; +} + static int sync_file_set_fence(struct sync_file *sync_file, struct dma_fence **fences, int num_fences) { @@ -268,7 +291,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, goto err; } - strlcpy(sync_file->name, name, sizeof(sync_file->name)); + strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file; err: @@ -277,22 +300,15 @@ err: } -static void sync_file_free(struct kref *kref) +static int sync_file_release(struct inode *inode, struct file *file) { - struct sync_file *sync_file = container_of(kref, struct sync_file, - kref); + struct sync_file *sync_file = file->private_data; if (test_bit(POLL_ENABLED, &sync_file->fence->flags)) dma_fence_remove_callback(sync_file->fence, &sync_file->cb); dma_fence_put(sync_file->fence); kfree(sync_file); -} - -static int sync_file_release(struct inode *inode, struct file *file) -{ - struct sync_file *sync_file = file->private_data; - kref_put(&sync_file->kref, sync_file_free); return 0; } @@ -422,7 +438,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, } no_fences: - strlcpy(info.name, sync_file->name, sizeof(info.name)); + sync_file_get_name(sync_file, info.name, sizeof(info.name)); info.status = dma_fence_is_signaled(sync_file->fence); info.num_fences = num_fences; diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 78d7fc0ebb57..83cb2a88c204 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -246,6 +246,8 @@ source "drivers/gpu/drm/fsl-dcu/Kconfig" source "drivers/gpu/drm/tegra/Kconfig" +source "drivers/gpu/drm/stm/Kconfig" + source "drivers/gpu/drm/panel/Kconfig" source "drivers/gpu/drm/bridge/Kconfig" @@ -274,6 +276,8 @@ source "drivers/gpu/drm/meson/Kconfig" source "drivers/gpu/drm/tinydrm/Kconfig" +source "drivers/gpu/drm/pl111/Kconfig" + # Keep legacy drivers last menuconfig DRM_LEGACY diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 59f0f9b696eb..24a066e1841c 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -16,7 +16,8 @@ drm-y := drm_auth.o drm_bufs.o drm_cache.o \ drm_framebuffer.o drm_connector.o drm_blend.o \ drm_encoder.o drm_mode_object.o drm_property.o \ drm_plane.o drm_color_mgmt.o drm_print.o \ - drm_dumb_buffers.o drm_mode_config.o + drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \ + drm_syncobj.o drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o drm-$(CONFIG_DRM_VM) += drm_vm.o @@ -34,6 +35,7 @@ drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \ drm_simple_kms_helper.o drm_modeset_helper.o \ drm_scdc_helper.o +drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o @@ -82,6 +84,7 @@ obj-$(CONFIG_DRM_BOCHS) += bochs/ obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/ obj-$(CONFIG_DRM_MSM) += msm/ obj-$(CONFIG_DRM_TEGRA) += tegra/ +obj-$(CONFIG_DRM_STM) += stm/ obj-$(CONFIG_DRM_STI) += sti/ obj-$(CONFIG_DRM_IMX) += imx/ obj-$(CONFIG_DRM_MEDIATEK) += mediatek/ @@ -96,3 +99,4 @@ obj-y += hisilicon/ obj-$(CONFIG_DRM_ZTE) += zte/ obj-$(CONFIG_DRM_MXSFB) += mxsfb/ obj-$(CONFIG_DRM_TINYDRM) += tinydrm/ +obj-$(CONFIG_DRM_PL111) += pl111/ diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 61360e27715f..26682454a446 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -5,15 +5,23 @@ config DRM_AMDGPU_SI Choose this option if you want to enable experimental support for SI asics. + SI is already supported in radeon. Experimental support for SI + in amdgpu will be disabled by default and is still provided by + radeon. Use module options to override this: + + radeon.si_support=0 amdgpu.si_support=1 + config DRM_AMDGPU_CIK bool "Enable amdgpu support for CIK parts" depends on DRM_AMDGPU help - Choose this option if you want to enable experimental support - for CIK asics. + Choose this option if you want to enable support for CIK asics. + + CIK is already supported in radeon. Support for CIK in amdgpu + will be disabled by default and is still provided by radeon. + Use module options to override this: - CIK is already supported in radeon. CIK support in amdgpu - is for experimentation and testing. + radeon.cik_support=0 amdgpu.cik_support=1 config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 660786aba7d2..faea6349228f 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -4,7 +4,7 @@ FULL_AMD_PATH=$(src)/.. -ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \ +ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_PATH)/include \ -I$(FULL_AMD_PATH)/amdgpu \ -I$(FULL_AMD_PATH)/scheduler \ @@ -24,7 +24,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ - amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o + amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ + amdgpu_queue_mgr.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ @@ -34,7 +35,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o amdgpu-y += \ - vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o + vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o # add GMC block amdgpu-y += \ @@ -54,7 +55,8 @@ amdgpu-y += \ # add PSP block amdgpu-y += \ amdgpu_psp.o \ - psp_v3_1.o + psp_v3_1.o \ + psp_v10_0.o # add SMC block amdgpu-y += \ @@ -92,6 +94,11 @@ amdgpu-y += \ vce_v3_0.o \ vce_v4_0.o +# add VCN block +amdgpu-y += \ + amdgpu_vcn.o \ + vcn_v1_0.o + # add amdkfd interfaces amdgpu-y += \ amdgpu_amdkfd.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 833c3c16501a..ff7bf1a9f967 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -36,16 +36,18 @@ #include <linux/hashtable.h> #include <linux/dma-fence.h> -#include <ttm/ttm_bo_api.h> -#include <ttm/ttm_bo_driver.h> -#include <ttm/ttm_placement.h> -#include <ttm/ttm_module.h> -#include <ttm/ttm_execbuf_util.h> +#include <drm/ttm/ttm_bo_api.h> +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_module.h> +#include <drm/ttm/ttm_execbuf_util.h> #include <drm/drmP.h> #include <drm/drm_gem.h> #include <drm/amdgpu_drm.h> +#include <kgd_kfd_interface.h> + #include "amd_shared.h" #include "amdgpu_mode.h" #include "amdgpu_ih.h" @@ -62,6 +64,7 @@ #include "amdgpu_acp.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" +#include "amdgpu_vcn.h" #include "gpu_scheduler.h" #include "amdgpu_virt.h" @@ -92,6 +95,7 @@ extern int amdgpu_vm_size; extern int amdgpu_vm_block_size; extern int amdgpu_vm_fault_stop; extern int amdgpu_vm_debug; +extern int amdgpu_vm_update_mode; extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; extern int amdgpu_no_evict; @@ -109,6 +113,15 @@ extern int amdgpu_prim_buf_per_se; extern int amdgpu_pos_buf_per_se; extern int amdgpu_cntl_sb_buf_per_se; extern int amdgpu_param_buf_per_se; +extern int amdgpu_job_hang_limit; +extern int amdgpu_lbpw; + +#ifdef CONFIG_DRM_AMDGPU_SI +extern int amdgpu_si_support; +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK +extern int amdgpu_cik_support; +#endif #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 @@ -305,8 +318,8 @@ struct amdgpu_gart_funcs { /* set pte flags based per asic */ uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, uint32_t flags); - /* adjust mc addr in fb for APU case */ - u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr); + /* get the pde for a given mc addr */ + u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr); uint32_t (*get_invalidate_req)(unsigned int vm_id); }; @@ -554,7 +567,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_fini(struct amdgpu_device *adev); -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages); int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, @@ -602,6 +615,7 @@ struct amdgpu_mc { uint32_t srbm_soft_reset; struct amdgpu_mode_mc_save save; bool prt_warning; + uint64_t stolen_size; /* apertures */ u64 shared_aperture_start; u64 shared_aperture_end; @@ -772,6 +786,29 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, struct dma_fence **f); /* + * Queue manager + */ +struct amdgpu_queue_mapper { + int hw_ip; + struct mutex lock; + /* protected by lock */ + struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS]; +}; + +struct amdgpu_queue_mgr { + struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM]; +}; + +int amdgpu_queue_mgr_init(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr); +int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr); +int amdgpu_queue_mgr_map(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr, + int hw_ip, int instance, int ring, + struct amdgpu_ring **out_ring); + +/* * context related structures */ @@ -784,6 +821,7 @@ struct amdgpu_ctx_ring { struct amdgpu_ctx { struct kref refcount; struct amdgpu_device *adev; + struct amdgpu_queue_mgr queue_mgr; unsigned reset_counter; spinlock_t ring_lock; struct dma_fence **fences; @@ -822,6 +860,7 @@ struct amdgpu_fpriv { struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; + u32 vram_lost_counter; }; /* @@ -830,6 +869,8 @@ struct amdgpu_fpriv { struct amdgpu_bo_list { struct mutex lock; + struct rcu_head rhead; + struct kref refcount; struct amdgpu_bo *gds_obj; struct amdgpu_bo *gws_obj; struct amdgpu_bo *oa_obj; @@ -893,20 +934,26 @@ struct amdgpu_rlc { u32 *register_restore; }; +#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES + struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; struct amdgpu_bo *mec_fw_obj; u64 mec_fw_gpu_addr; - u32 num_pipe; u32 num_mec; - u32 num_queue; + u32 num_pipe_per_mec; + u32 num_queue_per_pipe; void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; + + /* These are the resources for which amdgpu takes ownership */ + DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; struct amdgpu_kiq { u64 eop_gpu_addr; struct amdgpu_bo *eop_obj; + struct mutex ring_mutex; struct amdgpu_ring ring; struct amdgpu_irq_src irq; }; @@ -981,9 +1028,15 @@ struct amdgpu_gfx_config { }; struct amdgpu_cu_info { - uint32_t number; /* total active CU number */ - uint32_t ao_cu_mask; + uint32_t max_waves_per_simd; uint32_t wave_front_size; + uint32_t max_scratch_slots_per_cu; + uint32_t lds_size; + + /* total active CU number */ + uint32_t number; + uint32_t ao_cu_mask; + uint32_t ao_cu_bitmap[4][4]; uint32_t bitmap[4][4]; }; @@ -1061,6 +1114,8 @@ struct amdgpu_gfx { uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; bool in_reset; + /* s3/s4 mask */ + bool in_suspend; /* NGG */ struct amdgpu_ngg ngg; }; @@ -1109,12 +1164,14 @@ struct amdgpu_cs_parser { /* user fence */ struct amdgpu_bo_list_entry uf_entry; + + unsigned num_post_dep_syncobjs; + struct drm_syncobj **post_dep_syncobjs; }; #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ #define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ -#define AMDGPU_VM_DOMAIN (1 << 3) /* bit set means in virtual memory context */ struct amdgpu_job { struct amd_sched_job base; @@ -1122,6 +1179,8 @@ struct amdgpu_job { struct amdgpu_vm *vm; struct amdgpu_ring *ring; struct amdgpu_sync sync; + struct amdgpu_sync dep_sync; + struct amdgpu_sync sched_sync; struct amdgpu_ib *ibs; struct dma_fence *fence; /* the hw fence */ uint32_t preamble_status; @@ -1129,7 +1188,6 @@ struct amdgpu_job { void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ bool vm_needs_flush; - bool need_pipeline_sync; unsigned vm_id; uint64_t vm_pd_addr; uint32_t gds_base, gds_size; @@ -1221,6 +1279,9 @@ struct amdgpu_firmware { const struct amdgpu_psp_funcs *funcs; struct amdgpu_bo *rbuf; struct mutex mutex; + + /* gpu info firmware data pointer */ + const struct firmware *gpu_info_fw; }; /* @@ -1296,7 +1357,6 @@ struct amdgpu_smumgr { */ struct amdgpu_allowed_register_entry { uint32_t reg_offset; - bool untouched; bool grbm_indexed; }; @@ -1424,6 +1484,7 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); +#define AMDGPU_RESET_MAGIC_NUM 64 struct amdgpu_device { struct device *dev; struct drm_device *ddev; @@ -1523,7 +1584,9 @@ struct amdgpu_device { atomic64_t gtt_usage; atomic64_t num_bytes_moved; atomic64_t num_evictions; + atomic64_t num_vram_cpu_page_faults; atomic_t gpu_reset_counter; + atomic_t vram_lost_counter; /* data for buffer migration throttling */ struct { @@ -1570,11 +1633,18 @@ struct amdgpu_device { /* sdma */ struct amdgpu_sdma sdma; - /* uvd */ - struct amdgpu_uvd uvd; + union { + struct { + /* uvd */ + struct amdgpu_uvd uvd; + + /* vce */ + struct amdgpu_vce vce; + }; - /* vce */ - struct amdgpu_vce vce; + /* vcn */ + struct amdgpu_vcn vcn; + }; /* firmwares */ struct amdgpu_firmware firmware; @@ -1598,6 +1668,9 @@ struct amdgpu_device { /* amdkfd interface */ struct kfd_dev *kfd; + /* delayed work_func for deferring clockgating during resume */ + struct delayed_work late_init_work; + struct amdgpu_virt virt; /* link all shadow bo */ @@ -1606,9 +1679,13 @@ struct amdgpu_device { /* link all gtt */ spinlock_t gtt_list_lock; struct list_head gtt_list; + /* keep an lru list of rings by HW IP */ + struct list_head ring_lru_list; + spinlock_t ring_lru_list_lock; /* record hw reset is performed */ bool has_hw_reset; + u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; }; @@ -1617,7 +1694,6 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) return container_of(bdev, struct amdgpu_device, mman.bdev); } -bool amdgpu_device_is_px(struct drm_device *dev); int amdgpu_device_init(struct amdgpu_device *adev, struct drm_device *ddev, struct pci_dev *pdev, @@ -1733,30 +1809,31 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr unsigned occupied, chunk1, chunk2; void *dst; - if (ring->count_dw < count_dw) { + if (unlikely(ring->count_dw < count_dw)) { DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); - } else { - occupied = ring->wptr & ring->buf_mask; - dst = (void *)&ring->ring[occupied]; - chunk1 = ring->buf_mask + 1 - occupied; - chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; - chunk2 = count_dw - chunk1; - chunk1 <<= 2; - chunk2 <<= 2; - - if (chunk1) - memcpy(dst, src, chunk1); - - if (chunk2) { - src += chunk1; - dst = (void *)ring->ring; - memcpy(dst, src, chunk2); - } - - ring->wptr += count_dw; - ring->wptr &= ring->ptr_mask; - ring->count_dw -= count_dw; + return; + } + + occupied = ring->wptr & ring->buf_mask; + dst = (void *)&ring->ring[occupied]; + chunk1 = ring->buf_mask + 1 - occupied; + chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; + chunk2 = count_dw - chunk1; + chunk1 <<= 2; + chunk2 <<= 2; + + if (chunk1) + memcpy(dst, src, chunk1); + + if (chunk2) { + src += chunk1; + dst = (void *)ring->ring; + memcpy(dst, src, chunk2); } + + ring->wptr += count_dw; + ring->wptr &= ring->ptr_mask; + ring->count_dw -= count_dw; } static inline struct amdgpu_sdma_instance * @@ -1792,6 +1869,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) +#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) @@ -1813,6 +1891,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) +#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) @@ -1848,10 +1927,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev); bool amdgpu_need_post(struct amdgpu_device *adev); void amdgpu_update_display_priority(struct amdgpu_device *adev); -int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); -int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, - u32 ip_instance, u32 ring, - struct amdgpu_ring **out_ring); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); @@ -1900,6 +1975,8 @@ static inline bool amdgpu_has_atpx(void) { return false; } extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; extern const int amdgpu_max_kms_ioctl; +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv); int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); void amdgpu_driver_unload_kms(struct drm_device *dev); void amdgpu_driver_lastclose_kms(struct drm_device *dev); @@ -1912,10 +1989,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe); int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe); void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe); -int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, - int *max_error, - struct timeval *vblank_time, - unsigned flags); long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index dba8a5b25e66..5f8ada1d872b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -24,6 +24,7 @@ #include "amd_shared.h" #include <drm/drmP.h> #include "amdgpu.h" +#include "amdgpu_gfx.h" #include <linux/module.h> const struct kfd2kgd_calls *kfd2kgd; @@ -60,9 +61,9 @@ int amdgpu_amdkfd_init(void) return ret; } -bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev) +bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev) { - switch (rdev->asic_type) { + switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); @@ -86,59 +87,83 @@ void amdgpu_amdkfd_fini(void) } } -void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev) +void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { if (kgd2kfd) - rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, - rdev->pdev, kfd2kgd); + adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, + adev->pdev, kfd2kgd); } -void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev) +void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { - if (rdev->kfd) { + int i; + int last_valid_bit; + if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, - - .first_compute_pipe = 1, - .compute_pipe_count = 4 - 1, + .num_mec = adev->gfx.mec.num_mec, + .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe }; - amdgpu_doorbell_get_kfd_info(rdev, + /* this is going to have a few of the MSBs set that we need to + * clear */ + bitmap_complement(gpu_resources.queue_bitmap, + adev->gfx.mec.queue_bitmap, + KGD_MAX_QUEUES); + + /* remove the KIQ bit as well */ + if (adev->gfx.kiq.ring.ready) + clear_bit(amdgpu_gfx_queue_to_bit(adev, + adev->gfx.kiq.ring.me - 1, + adev->gfx.kiq.ring.pipe, + adev->gfx.kiq.ring.queue), + gpu_resources.queue_bitmap); + + /* According to linux/bitmap.h we shouldn't use bitmap_clear if + * nbits is not compile time constant */ + last_valid_bit = adev->gfx.mec.num_mec + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) + clear_bit(i, gpu_resources.queue_bitmap); + + amdgpu_doorbell_get_kfd_info(adev, &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); - kgd2kfd->device_init(rdev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd, &gpu_resources); } } -void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev) +void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { - if (rdev->kfd) { - kgd2kfd->device_exit(rdev->kfd); - rdev->kfd = NULL; + if (adev->kfd) { + kgd2kfd->device_exit(adev->kfd); + adev->kfd = NULL; } } -void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, +void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry) { - if (rdev->kfd) - kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); + if (adev->kfd) + kgd2kfd->interrupt(adev->kfd, ih_ring_entry); } -void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev) +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) { - if (rdev->kfd) - kgd2kfd->suspend(rdev->kfd); + if (adev->kfd) + kgd2kfd->suspend(adev->kfd); } -int amdgpu_amdkfd_resume(struct amdgpu_device *rdev) +int amdgpu_amdkfd_resume(struct amdgpu_device *adev) { int r = 0; - if (rdev->kfd) - r = kgd2kfd->resume(rdev->kfd); + if (adev->kfd) + r = kgd2kfd->resume(adev->kfd); return r; } @@ -147,7 +172,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct kgd_mem **mem = (struct kgd_mem **) mem_obj; int r; @@ -159,10 +184,10 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, if ((*mem) == NULL) return -ENOMEM; - r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, + r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); if (r) { - dev_err(rdev->dev, + dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); return r; } @@ -170,21 +195,21 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, /* map the buffer */ r = amdgpu_bo_reserve((*mem)->bo, true); if (r) { - dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); + dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); goto allocate_mem_reserve_bo_failed; } r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, &(*mem)->gpu_addr); if (r) { - dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); + dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); goto allocate_mem_pin_bo_failed; } *gpu_addr = (*mem)->gpu_addr; r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); if (r) { - dev_err(rdev->dev, + dev_err(adev->dev, "(%d) failed to map bo to kernel for amdkfd\n", r); goto allocate_mem_kmap_bo_failed; } @@ -220,27 +245,27 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) uint64_t get_vmem_size(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; BUG_ON(kgd == NULL); - return rdev->mc.real_vram_size; + return adev->mc.real_vram_size; } uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (rdev->gfx.funcs->get_gpu_clock_counter) - return rdev->gfx.funcs->get_gpu_clock_counter(rdev); + if (adev->gfx.funcs->get_gpu_clock_counter) + return adev->gfx.funcs->get_gpu_clock_counter(adev); return 0; } uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; /* The sclk is in quantas of 10kHz */ - return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; + return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index de530f68d4e3..73f83a10ae14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -39,15 +39,15 @@ struct kgd_mem { int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); -bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev); +bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev); -void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev); -int amdgpu_amdkfd_resume(struct amdgpu_device *rdev); -void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev); +int amdgpu_amdkfd_resume(struct amdgpu_device *adev); +void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry); -void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev); -void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev); -void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev); +void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); +void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); +void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1a0a5f7cccbc..5254562fd0f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -29,6 +29,7 @@ #include "cikd.h" #include "cik_sdma.h" #include "amdgpu_ucode.h" +#include "gfx_v7_0.h" #include "gca/gfx_7_2_d.h" #include "gca/gfx_7_2_enum.h" #include "gca/gfx_7_2_sh_mask.h" @@ -38,8 +39,6 @@ #include "gmc/gmc_7_1_sh_mask.h" #include "cik_structs.h" -#define CIK_PIPE_PER_MEC (4) - enum { MAX_TRAPID = 8, /* 3 bits in the bitfield. */ MAX_WATCH_ADDRESSES = 4 @@ -185,8 +184,10 @@ static void unlock_srbm(struct kgd_dev *kgd) static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id) { - uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, queue_id, 0); } @@ -243,18 +244,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); - - lock_srbm(kgd, mec, pipe, 0, 0); - WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8)); - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8)); - WREG32(mmCP_HPD_EOP_VMID, 0); - WREG32(mmCP_HPD_EOP_CONTROL, hpd_size); - unlock_srbm(kgd); - + /* amdgpu owns the per-pipe state */ return 0; } @@ -264,8 +254,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) uint32_t mec; uint32_t pipe; - mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; - pipe = (pipe_id % CIK_PIPE_PER_MEC); + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, 0, 0); @@ -309,55 +299,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); - - acquire_queue(kgd, pipe_id, queue_id); - WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); - WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); - - WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); - WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); - - WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); - WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); - WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); - - WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); - - WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); - WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); - WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type); - - WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo); - WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi); - WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo); - WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi); - - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - m->cp_hqd_pq_rptr_report_addr_hi); - - WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); - - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi); - - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); - - WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); - - WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); - - WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); - WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); - - WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); - if (is_wptr_shadow_valid) - WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow); + m->cp_hqd_pq_wptr = wptr_shadow; - WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); + acquire_queue(kgd, pipe_id, queue_id); + gfx_v7_0_mqd_commit(adev, m); release_queue(kgd); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 6697612239c2..133d06671e46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -28,6 +28,7 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ucode.h" +#include "gfx_v8_0.h" #include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" @@ -38,8 +39,6 @@ #include "vi_structs.h" #include "vid.h" -#define VI_PIPE_PER_MEC (4) - struct cik_sdma_rlc_registers; /* @@ -146,8 +145,10 @@ static void unlock_srbm(struct kgd_dev *kgd) static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id) { - uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC); + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, queue_id, 0); } @@ -205,6 +206,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr) { + /* amdgpu owns the per-pipe state */ return 0; } @@ -214,8 +216,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) uint32_t mec; uint32_t pipe; - mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; - pipe = (pipe_id % VI_PIPE_PER_MEC); + mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, 0, 0); @@ -251,53 +253,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); - acquire_queue(kgd, pipe_id, queue_id); - - WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); - WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); - - WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); - WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); - WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); - WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); - WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); - WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - m->cp_hqd_pq_rptr_report_addr_hi); - - if (valid_wptr > 0) - WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr); - - WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); - - WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi); - WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control); - WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); - WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); - WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events); - - WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo); - WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi); - WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control); - WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset); - WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size); - WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset); - WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size); - - WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); - - WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_ERROR, m->cp_hqd_error); - WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); - WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones); - - WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); + if (valid_wptr == 0) + m->cp_hqd_pq_wptr = shadow_wptr; + acquire_queue(kgd, pipe_id, queue_id); + gfx_v8_0_mqd_commit(adev, mqd); release_queue(kgd); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index a6649874e6ce..f621ee115c98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -35,33 +35,59 @@ #define AMDGPU_BO_LIST_MAX_PRIORITY 32u #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) -static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, - struct amdgpu_bo_list **result, +static int amdgpu_bo_list_set(struct amdgpu_device *adev, + struct drm_file *filp, + struct amdgpu_bo_list *list, + struct drm_amdgpu_bo_list_entry *info, + unsigned num_entries); + +static void amdgpu_bo_list_release_rcu(struct kref *ref) +{ + unsigned i; + struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list, + refcount); + + for (i = 0; i < list->num_entries; ++i) + amdgpu_bo_unref(&list->array[i].robj); + + mutex_destroy(&list->lock); + kvfree(list->array); + kfree_rcu(list, rhead); +} + +static int amdgpu_bo_list_create(struct amdgpu_device *adev, + struct drm_file *filp, + struct drm_amdgpu_bo_list_entry *info, + unsigned num_entries, int *id) { int r; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_bo_list *list; - *result = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); - if (!*result) + list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); + if (!list) return -ENOMEM; + /* initialize bo list*/ + mutex_init(&list->lock); + kref_init(&list->refcount); + r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); + if (r) { + kfree(list); + return r; + } + + /* idr alloc should be called only after initialization of bo list. */ mutex_lock(&fpriv->bo_list_lock); - r = idr_alloc(&fpriv->bo_list_handles, *result, - 1, 0, GFP_KERNEL); + r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); + mutex_unlock(&fpriv->bo_list_lock); if (r < 0) { - mutex_unlock(&fpriv->bo_list_lock); - kfree(*result); + kfree(list); return r; } *id = r; - mutex_init(&(*result)->lock); - (*result)->num_entries = 0; - (*result)->array = NULL; - - mutex_lock(&(*result)->lock); - mutex_unlock(&fpriv->bo_list_lock); - return 0; } @@ -71,13 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) mutex_lock(&fpriv->bo_list_lock); list = idr_remove(&fpriv->bo_list_handles, id); - if (list) { - /* Another user may have a reference to this list still */ - mutex_lock(&list->lock); - mutex_unlock(&list->lock); - amdgpu_bo_list_free(list); - } mutex_unlock(&fpriv->bo_list_lock); + if (list) + kref_put(&list->refcount, amdgpu_bo_list_release_rcu); } static int amdgpu_bo_list_set(struct amdgpu_device *adev, @@ -96,7 +118,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, int r; unsigned long total_size = 0; - array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry)); + array = kvmalloc_array(num_entries, sizeof(struct amdgpu_bo_list_entry), GFP_KERNEL); if (!array) return -ENOMEM; memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); @@ -148,7 +170,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, for (i = 0; i < list->num_entries; ++i) amdgpu_bo_unref(&list->array[i].robj); - drm_free_large(list->array); + kvfree(list->array); list->gds_obj = gds_obj; list->gws_obj = gws_obj; @@ -163,7 +185,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, error_free: while (i--) amdgpu_bo_unref(&array[i].robj); - drm_free_large(array); + kvfree(array); return r; } @@ -172,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) { struct amdgpu_bo_list *result; - mutex_lock(&fpriv->bo_list_lock); + rcu_read_lock(); result = idr_find(&fpriv->bo_list_handles, id); - if (result) - mutex_lock(&result->lock); - mutex_unlock(&fpriv->bo_list_lock); + + if (result) { + if (kref_get_unless_zero(&result->refcount)) + mutex_lock(&result->lock); + else + result = NULL; + } + rcu_read_unlock(); + return result; } @@ -214,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, void amdgpu_bo_list_put(struct amdgpu_bo_list *list) { mutex_unlock(&list->lock); + kref_put(&list->refcount, amdgpu_bo_list_release_rcu); } void amdgpu_bo_list_free(struct amdgpu_bo_list *list) @@ -224,7 +253,7 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list) amdgpu_bo_unref(&list->array[i].robj); mutex_destroy(&list->lock); - drm_free_large(list->array); + kvfree(list->array); kfree(list); } @@ -244,8 +273,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, int r; - info = drm_malloc_ab(args->in.bo_number, - sizeof(struct drm_amdgpu_bo_list_entry)); + info = kvmalloc_array(args->in.bo_number, + sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL); if (!info) return -ENOMEM; @@ -273,16 +302,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, switch (args->in.operation) { case AMDGPU_BO_LIST_OP_CREATE: - r = amdgpu_bo_list_create(fpriv, &list, &handle); + r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number, + &handle); if (r) goto error_free; - - r = amdgpu_bo_list_set(adev, filp, list, info, - args->in.bo_number); - amdgpu_bo_list_put(list); - if (r) - goto error_free; - break; case AMDGPU_BO_LIST_OP_DESTROY: @@ -311,11 +334,11 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, memset(args, 0, sizeof(*args)); args->out.list_handle = handle; - drm_free_large(info); + kvfree(info); return 0; error_free: - drm_free_large(info); + kvfree(info); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index c6dba1eaefbd..c0a806280257 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -838,6 +838,12 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, return -EINVAL; mode_info = info->mode_info; + if (mode_info) { + /* if the displays are off, vblank time is max */ + mode_info->vblank_time_us = 0xffffffff; + /* always set the reference clock */ + mode_info->ref_clock = adev->clock.spll.reference_freq; + } if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { list_for_each_entry(crtc, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4e6b9501ab0a..5599c01b265d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -27,81 +27,10 @@ #include <linux/pagemap.h> #include <drm/drmP.h> #include <drm/amdgpu_drm.h> +#include <drm/drm_syncobj.h> #include "amdgpu.h" #include "amdgpu_trace.h" -int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, - u32 ip_instance, u32 ring, - struct amdgpu_ring **out_ring) -{ - /* Right now all IPs have only one instance - multiple rings. */ - if (ip_instance != 0) { - DRM_ERROR("invalid ip instance: %d\n", ip_instance); - return -EINVAL; - } - - switch (ip_type) { - default: - DRM_ERROR("unknown ip type: %d\n", ip_type); - return -EINVAL; - case AMDGPU_HW_IP_GFX: - if (ring < adev->gfx.num_gfx_rings) { - *out_ring = &adev->gfx.gfx_ring[ring]; - } else { - DRM_ERROR("only %d gfx rings are supported now\n", - adev->gfx.num_gfx_rings); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_COMPUTE: - if (ring < adev->gfx.num_compute_rings) { - *out_ring = &adev->gfx.compute_ring[ring]; - } else { - DRM_ERROR("only %d compute rings are supported now\n", - adev->gfx.num_compute_rings); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_DMA: - if (ring < adev->sdma.num_instances) { - *out_ring = &adev->sdma.instance[ring].ring; - } else { - DRM_ERROR("only %d SDMA rings are supported\n", - adev->sdma.num_instances); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_UVD: - *out_ring = &adev->uvd.ring; - break; - case AMDGPU_HW_IP_VCE: - if (ring < adev->vce.num_rings){ - *out_ring = &adev->vce.ring[ring]; - } else { - DRM_ERROR("only %d VCE rings are supported\n", adev->vce.num_rings); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_UVD_ENC: - if (ring < adev->uvd.num_enc_rings){ - *out_ring = &adev->uvd.ring_enc[ring]; - } else { - DRM_ERROR("only %d UVD ENC rings are supported\n", - adev->uvd.num_enc_rings); - return -EINVAL; - } - break; - } - - if (!(*out_ring && (*out_ring)->adev)) { - DRM_ERROR("Ring %d is not initialized on IP %d\n", - ring, ip_type); - return -EINVAL; - } - - return 0; -} - static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, uint32_t *offset) @@ -135,7 +64,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, return 0; } -int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; @@ -194,7 +123,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) size = p->chunks[i].length_dw; cdata = (void __user *)(uintptr_t)user_chunk.chunk_data; - p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); + p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); if (p->chunks[i].kdata == NULL) { ret = -ENOMEM; i--; @@ -226,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) break; case AMDGPU_CHUNK_ID_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: break; default: @@ -247,7 +178,7 @@ free_all_kdata: i = p->nchunks - 1; free_partial_kdata: for (; i >= 0; i--) - drm_free_large(p->chunks[i].kdata); + kvfree(p->chunks[i].kdata); kfree(p->chunks); p->chunks = NULL; p->nchunks = 0; @@ -505,7 +436,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, return r; if (binding_userptr) { - drm_free_large(lobj->user_pages); + kvfree(lobj->user_pages); lobj->user_pages = NULL; } } @@ -566,12 +497,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, &e->user_invalidated) && e->user_pages) { /* We acquired a page array, but somebody - * invalidated it. Free it an try again + * invalidated it. Free it and try again */ release_pages(e->user_pages, e->robj->tbo.ttm->num_pages, false); - drm_free_large(e->user_pages); + kvfree(e->user_pages); e->user_pages = NULL; } @@ -597,12 +528,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_free_pages; } - /* Fill the page arrays for all useptrs. */ + /* Fill the page arrays for all userptrs. */ list_for_each_entry(e, &need_pages, tv.head) { struct ttm_tt *ttm = e->robj->tbo.ttm; - e->user_pages = drm_calloc_large(ttm->num_pages, - sizeof(struct page*)); + e->user_pages = kvmalloc_array(ttm->num_pages, + sizeof(struct page*), + GFP_KERNEL | __GFP_ZERO); if (!e->user_pages) { r = -ENOMEM; DRM_ERROR("calloc failure in %s\n", __func__); @@ -612,7 +544,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); if (r) { DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); - drm_free_large(e->user_pages); + kvfree(e->user_pages); e->user_pages = NULL; goto error_free_pages; } @@ -708,7 +640,7 @@ error_free_pages: release_pages(e->user_pages, e->robj->tbo.ttm->num_pages, false); - drm_free_large(e->user_pages); + kvfree(e->user_pages); } } @@ -753,6 +685,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); } + + for (i = 0; i < parser->num_post_dep_syncobjs; i++) + drm_syncobj_put(parser->post_dep_syncobjs[i]); + kfree(parser->post_dep_syncobjs); + dma_fence_put(parser->fence); if (parser->ctx) @@ -761,7 +698,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo amdgpu_bo_list_put(parser->bo_list); for (i = 0; i < parser->nchunks; i++) - drm_free_large(parser->chunks[i].kdata); + kvfree(parser->chunks[i].kdata); kfree(parser->chunks); if (parser->job) amdgpu_job_free(parser->job); @@ -916,9 +853,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return -EINVAL; } - r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, - &ring); + r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, + chunk_ib->ip_instance, chunk_ib->ring, &ring); if (r) return r; @@ -995,62 +931,148 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return 0; } -static int amdgpu_cs_dependencies(struct amdgpu_device *adev, - struct amdgpu_cs_parser *p) +static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - int i, j, r; - - for (i = 0; i < p->nchunks; ++i) { - struct drm_amdgpu_cs_chunk_dep *deps; - struct amdgpu_cs_chunk *chunk; - unsigned num_deps; + unsigned num_deps; + int i, r; + struct drm_amdgpu_cs_chunk_dep *deps; - chunk = &p->chunks[i]; + deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_dep); - if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) - continue; + for (i = 0; i < num_deps; ++i) { + struct amdgpu_ring *ring; + struct amdgpu_ctx *ctx; + struct dma_fence *fence; - deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_dep); + ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); + if (ctx == NULL) + return -EINVAL; - for (j = 0; j < num_deps; ++j) { - struct amdgpu_ring *ring; - struct amdgpu_ctx *ctx; - struct dma_fence *fence; + r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, + deps[i].ip_type, + deps[i].ip_instance, + deps[i].ring, &ring); + if (r) { + amdgpu_ctx_put(ctx); + return r; + } - r = amdgpu_cs_get_ring(adev, deps[j].ip_type, - deps[j].ip_instance, - deps[j].ring, &ring); + fence = amdgpu_ctx_get_fence(ctx, ring, + deps[i].handle); + if (IS_ERR(fence)) { + r = PTR_ERR(fence); + amdgpu_ctx_put(ctx); + return r; + } else if (fence) { + r = amdgpu_sync_fence(p->adev, &p->job->sync, + fence); + dma_fence_put(fence); + amdgpu_ctx_put(ctx); if (r) return r; + } + } + return 0; +} - ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id); - if (ctx == NULL) - return -EINVAL; +static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, + uint32_t handle) +{ + int r; + struct dma_fence *fence; + r = drm_syncobj_fence_get(p->filp, handle, &fence); + if (r) + return r; - fence = amdgpu_ctx_get_fence(ctx, ring, - deps[j].handle); - if (IS_ERR(fence)) { - r = PTR_ERR(fence); - amdgpu_ctx_put(ctx); - return r; + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence); + dma_fence_put(fence); - } else if (fence) { - r = amdgpu_sync_fence(adev, &p->job->sync, - fence); - dma_fence_put(fence); - amdgpu_ctx_put(ctx); - if (r) - return r; - } + return r; +} + +static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + unsigned num_deps; + int i, r; + struct drm_amdgpu_cs_chunk_sem *deps; + + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); + if (r) + return r; + } + return 0; +} + +static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + unsigned num_deps; + int i; + struct drm_amdgpu_cs_chunk_sem *deps; + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + + p->post_dep_syncobjs = kmalloc_array(num_deps, + sizeof(struct drm_syncobj *), + GFP_KERNEL); + p->num_post_dep_syncobjs = 0; + + for (i = 0; i < num_deps; ++i) { + p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); + if (!p->post_dep_syncobjs[i]) + return -EINVAL; + p->num_post_dep_syncobjs++; + } + return 0; +} + +static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + struct amdgpu_cs_parser *p) +{ + int i, r; + + for (i = 0; i < p->nchunks; ++i) { + struct amdgpu_cs_chunk *chunk; + + chunk = &p->chunks[i]; + + if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) { + r = amdgpu_cs_process_fence_dep(p, chunk); + if (r) + return r; + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { + r = amdgpu_cs_process_syncobj_in_dep(p, chunk); + if (r) + return r; + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { + r = amdgpu_cs_process_syncobj_out_dep(p, chunk); + if (r) + return r; } } return 0; } +static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) +{ + int i; + + for (i = 0; i < p->num_post_dep_syncobjs; ++i) + drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); +} + static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { @@ -1071,6 +1093,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->owner = p->filp; job->fence_ctx = entity->fence_context; p->fence = dma_fence_get(&job->base.s_fence->finished); + + amdgpu_cs_post_dependencies(p); + cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); @@ -1078,13 +1103,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); - return 0; } int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_cs *cs = data; struct amdgpu_cs_parser parser = {}; bool reserved_buffers = false; @@ -1092,6 +1117,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!adev->accel_working) return -EBUSY; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; parser.adev = adev; parser.filp = filp; @@ -1153,21 +1180,28 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; struct dma_fence *fence; long r; - r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, - wait->in.ring, &ring); - if (r) - return r; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; + r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, + wait->in.ip_type, wait->in.ip_instance, + wait->in.ring, &ring); + if (r) { + amdgpu_ctx_put(ctx); + return r; + } + fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); if (IS_ERR(fence)) r = PTR_ERR(fence); @@ -1203,15 +1237,17 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, struct dma_fence *fence; int r; - r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance, - user->ring, &ring); - if (r) - return ERR_PTR(r); - ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); if (ctx == NULL) return ERR_PTR(-EINVAL); + r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, + user->ip_instance, user->ring, &ring); + if (r) { + amdgpu_ctx_put(ctx); + return ERR_PTR(r); + } + fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); amdgpu_ctx_put(ctx); @@ -1332,12 +1368,15 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_wait_fences *wait = data; uint32_t fence_count = wait->in.fence_count; struct drm_amdgpu_fence *fences_user; struct drm_amdgpu_fence *fences; int r; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; /* Get the fences from userspace */ fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 90d1ac8a80f8..a11e44340b23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -52,12 +52,20 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) struct amd_sched_rq *rq; rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + + if (ring == &adev->gfx.kiq.ring) + continue; + r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity, rq, amdgpu_sched_jobs); if (r) goto failed; } + r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr); + if (r) + goto failed; + return 0; failed: @@ -86,6 +94,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) for (i = 0; i < adev->num_rings; i++) amd_sched_entity_fini(&adev->rings[i]->sched, &ctx->rings[i].entity); + + amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bbac5d5d1fcf..4a8fc15467cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -54,8 +54,14 @@ #include <linux/pci.h> #include <linux/firmware.h> +MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); + +#define AMDGPU_RESUME_MS 2000 + static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); +static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev); static const char *amdgpu_asic_name[] = { "TAHITI", @@ -77,6 +83,7 @@ static const char *amdgpu_asic_name[] = { "POLARIS11", "POLARIS12", "VEGA10", + "RAVEN", "LAST", }; @@ -478,9 +485,8 @@ void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, /* * amdgpu_wb_*() - * Writeback is the the method by which the the GPU updates special pages - * in memory with the status of certain GPU events (fences, ring pointers, - * etc.). + * Writeback is the method by which the GPU updates special pages in memory + * with the status of certain GPU events (fences, ring pointers,etc.). */ /** @@ -506,7 +512,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * Disables Writeback and frees the Writeback memory (all asics). + * Initializes writeback and allocates writeback memory (all asics). * Used at driver startup. * Returns 0 on success or an -error on failure. */ @@ -614,7 +620,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb) * @mc: memory controller structure holding memory informations * @base: base address at which to put VRAM * - * Function will place try to place VRAM at base address provided + * Function will try to place VRAM at base address provided * as parameter (which is so far either PCI aperture address or * for IGP TOM base address). * @@ -636,7 +642,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb) * ones) * * Note: IGP TOM addr should be the same as the aperture addr, we don't - * explicitly check for that thought. + * explicitly check for that though. * * FIXME: when reducing VRAM size align new size on power of 2. */ @@ -1067,6 +1073,10 @@ def_value: static void amdgpu_check_vm_size(struct amdgpu_device *adev) { + /* no need to check the default value */ + if (amdgpu_vm_size == -1) + return; + if (!amdgpu_check_pot_argument(amdgpu_vm_size)) { dev_warn(adev->dev, "VM size (%d) must be a power of 2\n", amdgpu_vm_size); @@ -1338,6 +1348,9 @@ int amdgpu_ip_block_add(struct amdgpu_device *adev, if (!ip_block_version) return -EINVAL; + DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks, + ip_block_version->funcs->name); + adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; return 0; @@ -1388,6 +1401,104 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) } } +static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[30]; + int err; + const struct gpu_info_firmware_header_v1_0 *hdr; + + adev->firmware.gpu_info_fw = NULL; + + switch (adev->asic_type) { + case CHIP_TOPAZ: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS11: + case CHIP_POLARIS10: + case CHIP_POLARIS12: + case CHIP_CARRIZO: + case CHIP_STONEY: +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_VERDE: + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_OLAND: + case CHIP_HAINAN: +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: +#endif + default: + return 0; + case CHIP_VEGA10: + chip_name = "vega10"; + break; + case CHIP_RAVEN: + chip_name = "raven"; + break; + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); + err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); + if (err) { + dev_err(adev->dev, + "Failed to load gpu_info firmware \"%s\"\n", + fw_name); + goto out; + } + err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); + if (err) { + dev_err(adev->dev, + "Failed to validate gpu_info firmware \"%s\"\n", + fw_name); + goto out; + } + + hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; + amdgpu_ucode_print_gpu_info_hdr(&hdr->header); + + switch (hdr->version_major) { + case 1: + { + const struct gpu_info_firmware_v1_0 *gpu_info_fw = + (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + + adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); + adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = + le32_to_cpu(gpu_info_fw->gc_num_tccs); + adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = + le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = + le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = + le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); + break; + } + default: + dev_err(adev->dev, + "Unsupported gpu_info table %d\n", hdr->header.ucode_version); + err = -EINVAL; + goto out; + } +out: + return err; +} + static int amdgpu_early_init(struct amdgpu_device *adev) { int i, r; @@ -1440,8 +1551,12 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return r; break; #endif - case CHIP_VEGA10: - adev->family = AMDGPU_FAMILY_AI; + case CHIP_VEGA10: + case CHIP_RAVEN: + if (adev->asic_type == CHIP_RAVEN) + adev->family = AMDGPU_FAMILY_RV; + else + adev->family = AMDGPU_FAMILY_AI; r = soc15_set_ip_blocks(adev); if (r) @@ -1452,6 +1567,10 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return -EINVAL; } + r = amdgpu_device_parse_gpu_info_fw(adev); + if (r) + return r; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_request_full_gpu(adev, true); if (r) @@ -1460,7 +1579,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { - DRM_ERROR("disabled ip block: %d\n", i); + DRM_ERROR("disabled ip block: %d <%s>\n", + i, adev->ip_blocks[i].version->funcs->name); adev->ip_blocks[i].status.valid = false; } else { if (adev->ip_blocks[i].version->funcs->early_init) { @@ -1548,22 +1668,24 @@ static int amdgpu_init(struct amdgpu_device *adev) return 0; } -static int amdgpu_late_init(struct amdgpu_device *adev) +static void amdgpu_fill_reset_magic(struct amdgpu_device *adev) +{ + memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); +} + +static bool amdgpu_check_vram_lost(struct amdgpu_device *adev) +{ + return !!memcmp(adev->gart.ptr, adev->reset_magic, + AMDGPU_RESET_MAGIC_NUM); +} + +static int amdgpu_late_set_cg_state(struct amdgpu_device *adev) { int i = 0, r; for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; - if (adev->ip_blocks[i].version->funcs->late_init) { - r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); - if (r) { - DRM_ERROR("late_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } - adev->ip_blocks[i].status.late_initialized = true; - } /* skip CG for VCE/UVD, it's handled specially */ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { @@ -1577,6 +1699,31 @@ static int amdgpu_late_init(struct amdgpu_device *adev) } } } + return 0; +} + +static int amdgpu_late_init(struct amdgpu_device *adev) +{ + int i = 0, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (adev->ip_blocks[i].version->funcs->late_init) { + r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); + if (r) { + DRM_ERROR("late_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + adev->ip_blocks[i].status.late_initialized = true; + } + } + + mod_delayed_work(system_wq, &adev->late_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); + + amdgpu_fill_reset_magic(adev); return 0; } @@ -1668,6 +1815,13 @@ static int amdgpu_fini(struct amdgpu_device *adev) return 0; } +static void amdgpu_late_init_func_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, late_init_work.work); + amdgpu_late_set_cg_state(adev); +} + int amdgpu_suspend(struct amdgpu_device *adev) { int i, r; @@ -1713,19 +1867,25 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev) { int i, r; - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; + static enum amd_ip_block_type ip_order[] = { + AMD_IP_BLOCK_TYPE_GMC, + AMD_IP_BLOCK_TYPE_COMMON, + AMD_IP_BLOCK_TYPE_IH, + }; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) - r = adev->ip_blocks[i].version->funcs->hw_init(adev); + for (i = 0; i < ARRAY_SIZE(ip_order); i++) { + int j; + struct amdgpu_ip_block *block; - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; + for (j = 0; j < adev->num_ip_blocks; j++) { + block = &adev->ip_blocks[j]; + + if (block->version->type != ip_order[i] || + !block->status.valid) + continue; + + r = block->version->funcs->hw_init(adev); + DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); } } @@ -1736,33 +1896,67 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) { int i, r; + static enum amd_ip_block_type ip_order[] = { + AMD_IP_BLOCK_TYPE_SMC, + AMD_IP_BLOCK_TYPE_DCE, + AMD_IP_BLOCK_TYPE_GFX, + AMD_IP_BLOCK_TYPE_SDMA, + AMD_IP_BLOCK_TYPE_VCE, + }; + + for (i = 0; i < ARRAY_SIZE(ip_order); i++) { + int j; + struct amdgpu_ip_block *block; + + for (j = 0; j < adev->num_ip_blocks; j++) { + block = &adev->ip_blocks[j]; + + if (block->version->type != ip_order[i] || + !block->status.valid) + continue; + + r = block->version->funcs->hw_init(adev); + DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); + } + } + + return 0; +} + +static int amdgpu_resume_phase1(struct amdgpu_device *adev) +{ + int i, r; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) - continue; - - r = adev->ip_blocks[i].version->funcs->hw_init(adev); - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_IH) { + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } } } return 0; } -static int amdgpu_resume(struct amdgpu_device *adev) +static int amdgpu_resume_phase2(struct amdgpu_device *adev) { int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) + continue; r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", @@ -1774,6 +1968,18 @@ static int amdgpu_resume(struct amdgpu_device *adev) return 0; } +static int amdgpu_resume(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_resume_phase1(adev); + if (r) + return r; + r = amdgpu_resume_phase2(adev); + + return r; +} + static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) { if (adev->is_atom_fw) { @@ -1856,8 +2062,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_check_arguments(adev); - /* Registers mapping */ - /* TODO: block userspace mapping of io register */ spin_lock_init(&adev->mmio_idx_lock); spin_lock_init(&adev->smc_idx_lock); spin_lock_init(&adev->pcie_idx_lock); @@ -1873,6 +2077,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->gtt_list); spin_lock_init(&adev->gtt_list_lock); + INIT_LIST_HEAD(&adev->ring_lru_list); + spin_lock_init(&adev->ring_lru_list_lock); + + INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler); + + /* Registers mapping */ + /* TODO: block userspace mapping of io register */ if (adev->asic_type >= CHIP_BONAIRE) { adev->rmmio_base = pci_resource_start(adev->pdev, 5); adev->rmmio_size = pci_resource_len(adev->pdev, 5); @@ -1985,6 +2196,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->accel_working = true; + amdgpu_vm_check_compute_bug(adev); + /* Initialize the buffer migration limit. */ if (amdgpu_moverate >= 0) max_MBps = amdgpu_moverate; @@ -2013,6 +2226,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) DRM_ERROR("registering register debugfs failed (%d).\n", r); + r = amdgpu_debugfs_test_ib_ring_init(adev); + if (r) + DRM_ERROR("registering register test ib ring debugfs failed (%d).\n", r); + r = amdgpu_debugfs_firmware_init(adev); if (r) DRM_ERROR("registering firmware debugfs failed (%d).\n", r); @@ -2069,7 +2286,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); + if (adev->firmware.gpu_info_fw) { + release_firmware(adev->firmware.gpu_info_fw); + adev->firmware.gpu_info_fw = NULL; + } adev->accel_working = false; + cancel_delayed_work_sync(&adev->late_init_work); /* free i2c buses */ amdgpu_i2c_fini(adev); amdgpu_atombios_fini(adev); @@ -2454,16 +2676,15 @@ err: * amdgpu_sriov_gpu_reset - reset the asic * * @adev: amdgpu device pointer - * @voluntary: if this reset is requested by guest. - * (true means by guest and false means by HYPERVISOR ) + * @job: which job trigger hang * * Attempt the reset the GPU if it has hung (all asics). * for SRIOV case. * Returns 0 for success or an error on failure. */ -int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job) { - int i, r = 0; + int i, j, r = 0; int resched; struct amdgpu_bo *bo, *tmp; struct amdgpu_ring *ring; @@ -2476,22 +2697,39 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); - /* block scheduler */ - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - ring = adev->rings[i]; + /* we start from the ring trigger GPU hang */ + j = job ? job->ring->idx : 0; + /* block scheduler */ + for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { + ring = adev->rings[i % AMDGPU_MAX_RINGS]; if (!ring || !ring->sched.thread) continue; kthread_park(ring->sched.thread); + + if (job && j != i) + continue; + + /* here give the last chance to check if job removed from mirror-list + * since we already pay some time on kthread_park */ + if (job && list_empty(&job->base.node)) { + kthread_unpark(ring->sched.thread); + goto give_up_reset; + } + + if (amd_sched_invalidate_job(&job->base, amdgpu_job_hang_limit)) + amd_sched_job_kickout(&job->base); + + /* only do job_reset on the hang ring if @job not NULL */ amd_sched_hw_job_reset(&ring->sched); - } - /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ - amdgpu_fence_driver_force_completion(adev); + /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ + amdgpu_fence_driver_force_completion_ring(ring); + } /* request to take full control of GPU before re-initialization */ - if (voluntary) + if (job) amdgpu_virt_reset_gpu(adev); else amdgpu_virt_request_full_gpu(adev, true); @@ -2541,20 +2779,28 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) } dma_fence_put(fence); - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; + for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { + ring = adev->rings[i % AMDGPU_MAX_RINGS]; if (!ring || !ring->sched.thread) continue; + if (job && j != i) { + kthread_unpark(ring->sched.thread); + continue; + } + amd_sched_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); } drm_helper_resume_force_mode(adev->ddev); +give_up_reset: ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); if (r) { /* bad news, how to tell it to userspace ? */ dev_info(adev->dev, "GPU reset failed\n"); + } else { + dev_info(adev->dev, "GPU reset successed!\n"); } adev->gfx.in_reset = false; @@ -2574,10 +2820,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) { int i, r; int resched; - bool need_full_reset; - - if (amdgpu_sriov_vf(adev)) - return amdgpu_sriov_gpu_reset(adev, true); + bool need_full_reset, vram_lost = false; if (!amdgpu_check_soft_reset(adev)) { DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); @@ -2637,16 +2880,27 @@ retry: if (!r) { dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); - r = amdgpu_resume(adev); + r = amdgpu_resume_phase1(adev); + if (r) + goto out; + vram_lost = amdgpu_check_vram_lost(adev); + if (vram_lost) { + DRM_ERROR("VRAM is lost!\n"); + atomic_inc(&adev->vram_lost_counter); + } + r = amdgpu_ttm_recover_gart(adev); + if (r) + goto out; + r = amdgpu_resume_phase2(adev); + if (r) + goto out; + if (vram_lost) + amdgpu_fill_reset_magic(adev); } } +out: if (!r) { amdgpu_irq_gpu_reset_resume_helper(adev); - if (need_full_reset && amdgpu_need_backup(adev)) { - r = amdgpu_ttm_recover_gart(adev); - if (r) - DRM_ERROR("gart recovery failed!!!\n"); - } r = amdgpu_ib_ring_tests(adev); if (r) { dev_err(adev->dev, "ib ring test failed (%d).\n", r); @@ -2708,10 +2962,11 @@ retry: drm_helper_resume_force_mode(adev->ddev); ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); - if (r) { + if (r) /* bad news, how to tell it to userspace ? */ dev_info(adev->dev, "GPU reset failed\n"); - } + else + dev_info(adev->dev, "GPU reset successed!\n"); return r; } @@ -3495,11 +3750,60 @@ static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) } } +static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + int r = 0, i; + + /* hold on the scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + kthread_park(ring->sched.thread); + } + + seq_printf(m, "run ib test:\n"); + r = amdgpu_ib_ring_tests(adev); + if (r) + seq_printf(m, "ib ring tests failed (%d).\n", r); + else + seq_printf(m, "ib ring tests passed.\n"); + + /* go on the scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + kthread_unpark(ring->sched.thread); + } + + return 0; +} + +static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = { + {"amdgpu_test_ib", &amdgpu_debugfs_test_ib} +}; + +static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) +{ + return amdgpu_debugfs_add_files(adev, + amdgpu_debugfs_test_ib_ring_list, 1); +} + int amdgpu_debugfs_init(struct drm_minor *minor) { return 0; } #else +static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) +{ + return 0; +} static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 38e9b0d3659a..1cb52fd19060 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -22,7 +22,7 @@ * Authors: Alex Deucher */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_i2c.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ab6b0d0febab..b59f37c83fa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -39,7 +39,7 @@ #include <linux/module.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> -#include "drm_crtc_helper.h" +#include <drm/drm_crtc_helper.h> #include "amdgpu.h" #include "amdgpu_irq.h" @@ -65,9 +65,12 @@ * - 3.13.0 - Add PRT support * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality * - 3.15.0 - Export more gpu info for gfx9 + * - 3.16.0 - Add reserved vmid support + * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS. + * - 3.18.0 - Export gpu always on cu bitmap */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 15 +#define KMS_DRIVER_MINOR 18 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -92,7 +95,8 @@ int amdgpu_vm_size = -1; int amdgpu_vm_block_size = -1; int amdgpu_vm_fault_stop = 0; int amdgpu_vm_debug = 0; -int amdgpu_vram_page_split = 1024; +int amdgpu_vram_page_split = 512; +int amdgpu_vm_update_mode = -1; int amdgpu_exp_hw_support = 0; int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; @@ -110,6 +114,8 @@ int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; int amdgpu_cntl_sb_buf_per_se = 0; int amdgpu_param_buf_per_se = 0; +int amdgpu_job_hang_limit = 0; +int amdgpu_lbpw = -1; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -177,6 +183,9 @@ module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444); MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)"); module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); +MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both"); +module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); + MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)"); module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); @@ -232,6 +241,38 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); +MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)"); +module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444); + +MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)"); +module_param_named(lbpw, amdgpu_lbpw, int, 0444); + +#ifdef CONFIG_DRM_AMDGPU_SI + +#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) +int amdgpu_si_support = 0; +MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); +#else +int amdgpu_si_support = 1; +MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)"); +#endif + +module_param_named(si_support, amdgpu_si_support, int, 0444); +#endif + +#ifdef CONFIG_DRM_AMDGPU_CIK + +#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) +int amdgpu_cik_support = 0; +MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); +#else +int amdgpu_cik_support = 1; +MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)"); +#endif + +module_param_named(cik_support, amdgpu_cik_support, int, 0444); +#endif + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI @@ -461,6 +502,9 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, + /* Raven */ + {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, + {0, 0, 0} }; @@ -492,6 +536,7 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev) static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { + struct drm_device *dev; unsigned long flags = ent->driver_data; int ret; @@ -514,7 +559,29 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret) return ret; - return drm_get_pci_dev(pdev, ent, &kms_driver); + dev = drm_dev_alloc(&kms_driver, &pdev->dev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = pci_enable_device(pdev); + if (ret) + goto err_free; + + dev->pdev = pdev; + + pci_set_drvdata(pdev, dev); + + ret = drm_dev_register(dev, ent->driver_data); + if (ret) + goto err_pci; + + return 0; + +err_pci: + pci_disable_device(pdev); +err_free: + drm_dev_unref(dev); + return ret; } static void @@ -522,7 +589,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - drm_put_dev(dev); + drm_dev_unregister(dev); + drm_dev_unref(dev); } static void @@ -716,11 +784,21 @@ static const struct file_operations amdgpu_driver_kms_fops = { #endif }; +static bool +amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, + bool in_vblank_irq, int *vpos, int *hpos, + ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode) +{ + return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos, + stime, etime, mode); +} + static struct drm_driver kms_driver = { .driver_features = DRIVER_USE_AGP | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, + DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, .load = amdgpu_driver_load_kms, .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, @@ -730,8 +808,8 @@ static struct drm_driver kms_driver = { .get_vblank_counter = amdgpu_get_vblank_counter_kms, .enable_vblank = amdgpu_enable_vblank_kms, .disable_vblank = amdgpu_disable_vblank_kms, - .get_vblank_timestamp = amdgpu_get_vblank_timestamp_kms, - .get_scanout_position = amdgpu_get_crtc_scanoutpos, + .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, + .get_scanout_position = amdgpu_get_crtc_scanout_position, #if defined(CONFIG_DEBUG_FS) .debugfs_init = amdgpu_debugfs_init, #endif @@ -808,7 +886,7 @@ static int __init amdgpu_init(void) driver->num_ioctls = amdgpu_max_kms_ioctl; amdgpu_register_atpx_handler(); /* let modprobe override vga console setting */ - return drm_pci_init(driver, pdriver); + return pci_register_driver(pdriver); error_sched: amdgpu_fence_slab_fini(); @@ -823,7 +901,7 @@ error_sync: static void __exit amdgpu_exit(void) { amdgpu_amdkfd_fini(); - drm_pci_exit(driver, pdriver); + pci_unregister_driver(pdriver); amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); amd_sched_fence_slab_fini(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7b60fb79c3a6..333bad749067 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -541,6 +541,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev) } } +void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring) +{ + if (ring) + amdgpu_fence_write(ring, ring->fence_drv.sync_seq); +} + /* * Common fence implementation */ @@ -660,11 +666,17 @@ static const struct drm_info_list amdgpu_debugfs_fence_list[] = { {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} }; + +static const struct drm_info_list amdgpu_debugfs_fence_list_sriov[] = { + {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, +}; #endif int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) + if (amdgpu_sriov_vf(adev)) + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, 1); return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); #else return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 902e6015abca..a57abc1a25fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -224,8 +224,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) * * Unbinds the requested pages from the gart page table and * replaces them with the dummy page (all asics). + * Returns 0 for success, -EINVAL for failure. */ -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages) { unsigned t; @@ -237,7 +238,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, if (!adev->gart.ready) { WARN(1, "trying to unbind memory from uninitialized GART !\n"); - return; + return -EINVAL; } t = offset / AMDGPU_GPU_PAGE_SIZE; @@ -258,6 +259,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } mb(); amdgpu_gart_flush_gpu_tlb(adev, 0); + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 94cb91cf93eb..621f739103a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -219,16 +219,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, ttm_eu_backoff_reservation(&ticket, &list); } -static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r) -{ - if (r == -EDEADLK) { - r = amdgpu_gpu_reset(adev); - if (!r) - r = -EAGAIN; - } - return r; -} - /* * GEM ioctls. */ @@ -249,20 +239,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED| AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { - r = -EINVAL; - goto error_unlock; - } + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) + return -EINVAL; + /* reject invalid gem domains */ if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA)) { - r = -EINVAL; - goto error_unlock; - } + AMDGPU_GEM_DOMAIN_OA)) + return -EINVAL; /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | @@ -274,10 +261,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, size = size << AMDGPU_GWS_SHIFT; else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA) size = size << AMDGPU_OA_SHIFT; - else { - r = -EINVAL; - goto error_unlock; - } + else + return -EINVAL; } size = roundup(size, PAGE_SIZE); @@ -286,21 +271,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, args->in.domain_flags, kernel, &gobj); if (r) - goto error_unlock; + return r; r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(gobj); if (r) - goto error_unlock; + return r; memset(args, 0, sizeof(*args)); args->out.handle = handle; return 0; - -error_unlock: - r = amdgpu_gem_handle_lockup(adev, r); - return r; } int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, @@ -334,7 +315,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_DOMAIN_CPU, 0, 0, &gobj); if (r) - goto handle_lockup; + return r; bo = gem_to_amdgpu_bo(gobj); bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; @@ -374,7 +355,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(gobj); if (r) - goto handle_lockup; + return r; args->handle = handle; return 0; @@ -388,9 +369,6 @@ unlock_mmap_sem: release_object: drm_gem_object_unreference_unlocked(gobj); -handle_lockup: - r = amdgpu_gem_handle_lockup(adev, r); - return r; } @@ -456,7 +434,6 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns) int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct amdgpu_device *adev = dev->dev_private; union drm_amdgpu_gem_wait_idle *args = data; struct drm_gem_object *gobj; struct amdgpu_bo *robj; @@ -484,7 +461,6 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret; drm_gem_object_unreference_unlocked(gobj); - r = amdgpu_gem_handle_lockup(adev, r); return r; } @@ -593,9 +569,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, uint64_t va_flags; int r = 0; - if (!adev->vm_manager.enabled) - return -ENOTTY; - if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { dev_err(&dev->pdev->dev, "va_address 0x%lX is in reserved area 0x%X\n", @@ -621,6 +594,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->operation); return -EINVAL; } + if ((args->operation == AMDGPU_VA_OP_MAP) || + (args->operation == AMDGPU_VA_OP_REPLACE)) { + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; + } INIT_LIST_HEAD(&list); if ((args->operation != AMDGPU_VA_OP_CLEAR) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 19943356cca7..e26108aad3fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -108,3 +108,209 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s p = next + 1; } } + +void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) +{ + int i, queue, pipe, mec; + + /* policy for amdgpu compute queue ownership */ + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + queue = i % adev->gfx.mec.num_queue_per_pipe; + pipe = (i / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + mec = (i / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + + /* we've run out of HW */ + if (mec >= adev->gfx.mec.num_mec) + break; + + if (adev->gfx.mec.num_mec > 1) { + /* policy: amdgpu owns the first two queues of the first MEC */ + if (mec == 0 && queue < 2) + set_bit(i, adev->gfx.mec.queue_bitmap); + } else { + /* policy: amdgpu owns all queues in the first pipe */ + if (mec == 0 && pipe == 0) + set_bit(i, adev->gfx.mec.queue_bitmap); + } + } + + /* update the number of active compute rings */ + adev->gfx.num_compute_rings = + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* If you hit this case and edited the policy, you probably just + * need to increase AMDGPU_MAX_COMPUTE_RINGS */ + if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; +} + +static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + int queue_bit; + int mec, pipe, queue; + + queue_bit = adev->gfx.mec.num_mec + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + + while (queue_bit-- >= 0) { + if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) + continue; + + amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); + + /* Using pipes 2/3 from MEC 2 seems cause problems */ + if (mec == 1 && pipe > 1) + continue; + + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + return 0; + } + + dev_err(adev->dev, "Failed to find a queue for KIQ\n"); + return -EINVAL; +} + +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + int r = 0; + + mutex_init(&kiq->ring_mutex); + + r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); + if (r) + return r; + + ring->adev = NULL; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_KIQ; + + r = amdgpu_gfx_kiq_acquire(adev, ring); + if (r) + return r; + + ring->eop_gpu_addr = kiq->eop_gpu_addr; + sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); + r = amdgpu_ring_init(adev, ring, 1024, + irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); + if (r) + dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + + return r; +} + +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq) +{ + amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); + amdgpu_ring_fini(ring); +} + +void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); +} + +int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, + unsigned hpd_size) +{ + int r; + u32 *hpd; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, + &kiq->eop_gpu_addr, (void **)&hpd); + if (r) { + dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); + return r; + } + + memset(hpd, 0, hpd_size); + + r = amdgpu_bo_reserve(kiq->eop_obj, true); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); + amdgpu_bo_kunmap(kiq->eop_obj); + amdgpu_bo_unreserve(kiq->eop_obj); + + return 0; +} + +/* create MQD for each compute queue */ +int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size) +{ + struct amdgpu_ring *ring = NULL; + int r, i; + + /* create MQD for KIQ */ + ring = &adev->gfx.kiq.ring; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + } + + /* create MQD for each KCQ */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[i]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + } + } + + return 0; +} + +void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int i; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + kfree(adev->gfx.mec.mqd_backup[i]); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); + } + + ring = &adev->gfx.kiq.ring; + kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index e02044086445..1f279050d334 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -30,4 +30,64 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg); void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh); +void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev); + +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq); + +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq); + +void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); +int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, + unsigned hpd_size); + +int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size); +void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev); + +/** + * amdgpu_gfx_create_bitmask - create a bitmask + * + * @bit_width: length of the mask + * + * create a variable length bit mask. + * Returns the bitmask. + */ +static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width) +{ + return (u32)((1ULL << bit_width) - 1); +} + +static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, + int mec, int pipe, int queue) +{ + int bit = 0; + + bit += mec * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + bit += pipe * adev->gfx.mec.num_queue_per_pipe; + bit += queue; + + return bit; +} + +static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, + int *mec, int *pipe, int *queue) +{ + *queue = bit % adev->gfx.mec.num_queue_per_pipe; + *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + *mec = (bit / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + +} +static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, + int mec, int pipe, int queue) +{ + return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue), + adev->gfx.mec.queue_bitmap); +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 6e4ae0d983c2..f774b3f497d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; + struct dma_fence *tmp = NULL; bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; @@ -160,8 +161,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } - if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync) + + if (ring->funcs->emit_pipeline_sync && job && + ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || + amdgpu_vm_need_pipeline_sync(ring, job))) { amdgpu_ring_emit_pipeline_sync(ring); + dma_fence_put(tmp); + } + + if (ring->funcs->insert_start) + ring->funcs->insert_start(ring); if (vm) { r = amdgpu_vm_flush(ring, job); @@ -188,8 +197,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, status |= AMDGPU_HAVE_CTX_SWITCH; status |= job->preamble_status; - if (vm) - status |= AMDGPU_VM_DOMAIN; amdgpu_ring_emit_cntxcntl(ring, status); } @@ -208,6 +215,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = false; } + if (ring->funcs->emit_tmz) + amdgpu_ring_emit_tmz(ring, false); + if (ring->funcs->emit_hdp_invalidate #ifdef CONFIG_X86_64 && !(adev->flags & AMD_IS_APU) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index a3da1a122fc8..3de8e74e5b3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -62,8 +62,9 @@ enum amdgpu_ih_clientid AMDGPU_IH_CLIENTID_MP0 = 0x1e, AMDGPU_IH_CLIENTID_MP1 = 0x1f, - AMDGPU_IH_CLIENTID_MAX + AMDGPU_IH_CLIENTID_MAX, + AMDGPU_IH_CLIENTID_VCN = AMDGPU_IH_CLIENTID_UVD }; #define AMDGPU_IH_CLIENTID_LEGACY 0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index a6b7e367a860..62da6c5c6095 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -83,7 +83,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, reset_work); - amdgpu_gpu_reset(adev); + if (!amdgpu_sriov_vf(adev)) + amdgpu_gpu_reset(adev); } /* Disable *all* interrupts */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 7570f2439a11..3d641e10e6b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -36,7 +36,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job) job->base.sched->name, atomic_read(&job->ring->fence_drv.last_seq), job->ring->fence_drv.sync_seq); - amdgpu_gpu_reset(job->adev); + + if (amdgpu_sriov_vf(job->adev)) + amdgpu_sriov_gpu_reset(job->adev, job); + else + amdgpu_gpu_reset(job->adev); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, @@ -57,9 +61,10 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; - (*job)->need_pipeline_sync = false; amdgpu_sync_create(&(*job)->sync); + amdgpu_sync_create(&(*job)->dep_sync); + amdgpu_sync_create(&(*job)->sched_sync); return 0; } @@ -98,6 +103,8 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); + amdgpu_sync_free(&job->dep_sync); + amdgpu_sync_free(&job->sched_sync); kfree(job); } @@ -107,6 +114,8 @@ void amdgpu_job_free(struct amdgpu_job *job) dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); + amdgpu_sync_free(&job->dep_sync); + amdgpu_sync_free(&job->sched_sync); kfree(job); } @@ -138,11 +147,18 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) struct amdgpu_job *job = to_amdgpu_job(sched_job); struct amdgpu_vm *vm = job->vm; - struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync); + struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync); + int r; + if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) { + r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence); + if (r) + DRM_ERROR("Error adding fence to sync (%d)\n", r); + } + if (!fence) + fence = amdgpu_sync_get_fence(&job->sync); while (fence == NULL && vm && !job->vm_id) { struct amdgpu_ring *ring = job->ring; - int r; r = amdgpu_vm_grab_id(vm, ring, &job->sync, &job->base.s_fence->finished, @@ -153,9 +169,6 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) fence = amdgpu_sync_get_fence(&job->sync); } - if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) - job->need_pipeline_sync = true; - return fence; } @@ -163,6 +176,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) { struct dma_fence *fence = NULL; struct amdgpu_job *job; + struct amdgpu_fpriv *fpriv = NULL; int r; if (!sched_job) { @@ -174,10 +188,16 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); - r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); - if (r) - DRM_ERROR("Error scheduling IBs (%d)\n", r); - + if (job->vm) + fpriv = container_of(job->vm, struct amdgpu_fpriv, vm); + /* skip ib schedule when vram is lost */ + if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv)) + DRM_ERROR("Skip scheduling IBs!\n"); + else { + r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); + if (r) + DRM_ERROR("Error scheduling IBs (%d)\n", r); + } /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 96c341670782..b0b23101d1c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -87,6 +87,41 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) struct amdgpu_device *adev; int r, acpi_status; +#ifdef CONFIG_DRM_AMDGPU_SI + if (!amdgpu_si_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + dev_info(dev->dev, + "SI support provided by radeon.\n"); + dev_info(dev->dev, + "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + if (!amdgpu_cik_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + dev_info(dev->dev, + "CIK support provided by radeon.\n"); + dev_info(dev->dev, + "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif + adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); if (adev == NULL) { return -ENOMEM; @@ -235,6 +270,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; struct drm_amdgpu_info *info = data; struct amdgpu_mode_info *minfo = &adev->mode_info; void __user *out = (void __user *)(uintptr_t)info->return_pointer; @@ -247,6 +283,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (!info->return_size || !info->return_pointer) return -EINVAL; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: @@ -319,6 +357,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; ib_size_alignment = 1; break; + case AMDGPU_HW_IP_VCN_DEC: + type = AMD_IP_BLOCK_TYPE_VCN; + ring_mask = adev->vcn.ring_dec.ready ? 1 : 0; + ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_size_alignment = 16; + break; + case AMDGPU_HW_IP_VCN_ENC: + type = AMD_IP_BLOCK_TYPE_VCN; + for (i = 0; i < adev->vcn.num_enc_rings; i++) + ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i); + ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_size_alignment = 1; + break; default: return -EINVAL; } @@ -361,6 +412,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_HW_IP_UVD_ENC: type = AMD_IP_BLOCK_TYPE_UVD; break; + case AMDGPU_HW_IP_VCN_DEC: + case AMDGPU_HW_IP_VCN_ENC: + type = AMD_IP_BLOCK_TYPE_VCN; + break; default: return -EINVAL; } @@ -397,6 +452,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_INFO_NUM_EVICTIONS: ui64 = atomic64_read(&adev->num_evictions); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; + case AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS: + ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); + return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: ui64 = atomic64_read(&adev->vram_usage); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; @@ -536,6 +594,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.cu_active_number = adev->gfx.cu_info.number; dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; dev_info.ce_ram_size = adev->gfx.ce_ram_size; + memcpy(&dev_info.cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], + sizeof(adev->gfx.cu_info.ao_cu_bitmap)); memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], sizeof(adev->gfx.cu_info.bitmap)); dev_info.vram_type = adev->mc.vram_type; @@ -730,6 +790,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) vga_switcheroo_process_delayed_switch(); } +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv) +{ + return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter); +} + /** * amdgpu_driver_open_kms - drm callback for open * @@ -757,7 +823,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto out_suspend; } - r = amdgpu_vm_init(adev, &fpriv->vm); + r = amdgpu_vm_init(adev, &fpriv->vm, + AMDGPU_VM_CONTEXT_GFX); if (r) { kfree(fpriv); goto out_suspend; @@ -782,6 +849,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); + fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter); file_priv->driver_priv = fpriv; out_suspend: @@ -814,8 +882,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); - amdgpu_uvd_free_handles(adev, file_priv); - amdgpu_vce_free_handles(adev, file_priv); + if (adev->asic_type != CHIP_RAVEN) { + amdgpu_uvd_free_handles(adev, file_priv); + amdgpu_vce_free_handles(adev, file_priv); + } amdgpu_vm_bo_rmv(adev, fpriv->prt_va); @@ -945,50 +1015,10 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe) amdgpu_irq_put(adev, &adev->crtc_irq, idx); } -/** - * amdgpu_get_vblank_timestamp_kms - get vblank timestamp - * - * @dev: drm dev pointer - * @crtc: crtc to get the timestamp for - * @max_error: max error - * @vblank_time: time value - * @flags: flags passed to the driver - * - * Gets the timestamp on the requested crtc based on the - * scanout position. (all asics). - * Returns postive status flags on success, negative error on failure. - */ -int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, - int *max_error, - struct timeval *vblank_time, - unsigned flags) -{ - struct drm_crtc *crtc; - struct amdgpu_device *adev = dev->dev_private; - - if (pipe >= dev->num_crtcs) { - DRM_ERROR("Invalid crtc %u\n", pipe); - return -EINVAL; - } - - /* Get associated drm_crtc: */ - crtc = &adev->mode_info.crtcs[pipe]->base; - if (!crtc) { - /* This can occur on driver load if some component fails to - * initialize completely and driver is unloaded */ - DRM_ERROR("Uninitialized crtc %d\n", pipe); - return -EINVAL; - } - - /* Helper routine in DRM core does all the work: */ - return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error, - vblank_time, flags, - &crtc->hwmode); -} - const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index dbd10618ec20..43a9d3aec6c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -534,6 +534,9 @@ struct amdgpu_framebuffer { ((em) == ATOM_ENCODER_MODE_DP_MST)) /* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */ +#define DRM_SCANOUTPOS_VALID (1 << 0) +#define DRM_SCANOUTPOS_IN_VBLANK (1 << 1) +#define DRM_SCANOUTPOS_ACCURATE (1 << 2) #define USE_REAL_VBLANKSTART (1 << 30) #define GET_DISTANCE_TO_VBLANKSTART (1 << 31) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 365883d7948d..8ee69652be8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -960,6 +960,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) return -EINVAL; /* hurrah the memory is not visible ! */ + atomic64_inc(&adev->num_vram_cpu_page_faults); amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; for (i = 0; i < abo->placement.num_placement; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index f5ae871aa11c..b7e1c026c0c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -72,6 +72,7 @@ static int amdgpu_pp_early_init(void *handle) case CHIP_CARRIZO: case CHIP_STONEY: case CHIP_VEGA10: + case CHIP_RAVEN: adev->pp_enabled = true; if (amdgpu_create_pp_handle(adev)) return -EINVAL; @@ -187,6 +188,9 @@ static int amdgpu_pp_hw_fini(void *handle) int ret = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->pp_enabled && adev->pm.dpm_enabled) + amdgpu_pm_sysfs_fini(adev); + if (adev->powerplay.ip_funcs->hw_fini) ret = adev->powerplay.ip_funcs->hw_fini( adev->powerplay.pp_handle); @@ -205,10 +209,9 @@ static void amdgpu_pp_late_fini(void *handle) adev->powerplay.ip_funcs->late_fini( adev->powerplay.pp_handle); - if (adev->pp_enabled && adev->pm.dpm_enabled) - amdgpu_pm_sysfs_fini(adev); - amd_powerplay_destroy(adev->powerplay.pp_handle); + if (adev->pp_enabled) + amd_powerplay_destroy(adev->powerplay.pp_handle); } static int amdgpu_pp_suspend(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index ac5e92e5d59d..4083be61b328 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -24,12 +24,13 @@ */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" #include "soc15_common.h" #include "psp_v3_1.h" +#include "psp_v10_0.h" static void psp_set_funcs(struct amdgpu_device *adev); @@ -61,6 +62,12 @@ static int psp_sw_init(void *handle) psp->compare_sram_data = psp_v3_1_compare_sram_data; psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; break; + case CHIP_RAVEN: + psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf; + psp->ring_init = psp_v10_0_ring_init; + psp->cmd_submit = psp_v10_0_cmd_submit; + psp->compare_sram_data = psp_v10_0_compare_sram_data; + break; default: return -EINVAL; } @@ -145,8 +152,8 @@ static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t tmr_mc, uint32_t size) { cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; - cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = (uint32_t)tmr_mc; - cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = (uint32_t)(tmr_mc >> 32); + cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc); + cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_size = size; } @@ -230,6 +237,13 @@ static int psp_asd_load(struct psp_context *psp) int ret; struct psp_gfx_cmd_resp *cmd; + /* If PSP version doesn't match ASD version, asd loading will be failed. + * add workaround to bypass it for sriov now. + * TODO: add version check to make it common + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) return -ENOMEM; @@ -319,14 +333,11 @@ static int psp_load_fw(struct amdgpu_device *adev) { int ret; struct psp_context *psp = &adev->psp; - struct psp_gfx_cmd_resp *cmd; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) + psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!psp->cmd) return -ENOMEM; - psp->cmd = cmd; - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, @@ -365,8 +376,6 @@ static int psp_load_fw(struct amdgpu_device *adev) if (ret) goto failed_mem; - kfree(cmd); - return 0; failed_mem: @@ -376,7 +385,8 @@ failed_mem1: amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); failed: - kfree(cmd); + kfree(psp->cmd); + psp->cmd = NULL; return ret; } @@ -436,6 +446,9 @@ static int psp_hw_fini(void *handle) amdgpu_bo_free_kernel(&psp->fence_buf_bo, &psp->fence_buf_mc_addr, &psp->fence_buf); + kfree(psp->cmd); + psp->cmd = NULL; + return 0; } @@ -542,3 +555,12 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block = .rev = 0, .funcs = &psp_ip_funcs, }; + +const struct amdgpu_ip_block_version psp_v10_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 10, + .minor = 0, + .rev = 0, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 0301e4e0b297..1a1c8b469f93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -138,4 +138,6 @@ extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, bool check_changed); +extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c new file mode 100644 index 000000000000..befc09b68543 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -0,0 +1,299 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Andres Rodriguez + */ + +#include "amdgpu.h" +#include "amdgpu_ring.h" + +static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper, + int hw_ip) +{ + if (!mapper) + return -EINVAL; + + if (hw_ip > AMDGPU_MAX_IP_NUM) + return -EINVAL; + + mapper->hw_ip = hw_ip; + mutex_init(&mapper->lock); + + memset(mapper->queue_map, 0, sizeof(mapper->queue_map)); + + return 0; +} + +static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper, + int ring) +{ + return mapper->queue_map[ring]; +} + +static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper, + int ring, struct amdgpu_ring *pring) +{ + if (WARN_ON(mapper->queue_map[ring])) { + DRM_ERROR("Un-expected ring re-map\n"); + return -EINVAL; + } + + mapper->queue_map[ring] = pring; + + return 0; +} + +static int amdgpu_identity_map(struct amdgpu_device *adev, + struct amdgpu_queue_mapper *mapper, + int ring, + struct amdgpu_ring **out_ring) +{ + switch (mapper->hw_ip) { + case AMDGPU_HW_IP_GFX: + *out_ring = &adev->gfx.gfx_ring[ring]; + break; + case AMDGPU_HW_IP_COMPUTE: + *out_ring = &adev->gfx.compute_ring[ring]; + break; + case AMDGPU_HW_IP_DMA: + *out_ring = &adev->sdma.instance[ring].ring; + break; + case AMDGPU_HW_IP_UVD: + *out_ring = &adev->uvd.ring; + break; + case AMDGPU_HW_IP_VCE: + *out_ring = &adev->vce.ring[ring]; + break; + case AMDGPU_HW_IP_UVD_ENC: + *out_ring = &adev->uvd.ring_enc[ring]; + break; + case AMDGPU_HW_IP_VCN_DEC: + *out_ring = &adev->vcn.ring_dec; + break; + case AMDGPU_HW_IP_VCN_ENC: + *out_ring = &adev->vcn.ring_enc[ring]; + break; + default: + *out_ring = NULL; + DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); + return -EINVAL; + } + + return amdgpu_update_cached_map(mapper, ring, *out_ring); +} + +static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip) +{ + switch (hw_ip) { + case AMDGPU_HW_IP_GFX: + return AMDGPU_RING_TYPE_GFX; + case AMDGPU_HW_IP_COMPUTE: + return AMDGPU_RING_TYPE_COMPUTE; + case AMDGPU_HW_IP_DMA: + return AMDGPU_RING_TYPE_SDMA; + case AMDGPU_HW_IP_UVD: + return AMDGPU_RING_TYPE_UVD; + case AMDGPU_HW_IP_VCE: + return AMDGPU_RING_TYPE_VCE; + default: + DRM_ERROR("Invalid HW IP specified %d\n", hw_ip); + return -1; + } +} + +static int amdgpu_lru_map(struct amdgpu_device *adev, + struct amdgpu_queue_mapper *mapper, + int user_ring, + struct amdgpu_ring **out_ring) +{ + int r, i, j; + int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip); + int ring_blacklist[AMDGPU_MAX_RINGS]; + struct amdgpu_ring *ring; + + /* 0 is a valid ring index, so initialize to -1 */ + memset(ring_blacklist, 0xff, sizeof(ring_blacklist)); + + for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) { + ring = mapper->queue_map[i]; + if (ring) + ring_blacklist[j++] = ring->idx; + } + + r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist, + j, out_ring); + if (r) + return r; + + return amdgpu_update_cached_map(mapper, user_ring, *out_ring); +} + +/** + * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct + * + * @adev: amdgpu_device pointer + * @mgr: amdgpu_queue_mgr structure holding queue information + * + * Initialize the the selected @mgr (all asics). + * + * Returns 0 on success, error on failure. + */ +int amdgpu_queue_mgr_init(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr) +{ + int i, r; + + if (!adev || !mgr) + return -EINVAL; + + memset(mgr, 0, sizeof(*mgr)); + + for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) { + r = amdgpu_queue_mapper_init(&mgr->mapper[i], i); + if (r) + return r; + } + + return 0; +} + +/** + * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct + * + * @adev: amdgpu_device pointer + * @mgr: amdgpu_queue_mgr structure holding queue information + * + * De-initialize the the selected @mgr (all asics). + * + * Returns 0 on success, error on failure. + */ +int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr) +{ + return 0; +} + +/** + * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring + * + * @adev: amdgpu_device pointer + * @mgr: amdgpu_queue_mgr structure holding queue information + * @hw_ip: HW IP enum + * @instance: HW instance + * @ring: user ring id + * @our_ring: pointer to mapped amdgpu_ring + * + * Map a userspace ring id to an appropriate kernel ring. Different + * policies are configurable at a HW IP level. + * + * Returns 0 on success, error on failure. + */ +int amdgpu_queue_mgr_map(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr, + int hw_ip, int instance, int ring, + struct amdgpu_ring **out_ring) +{ + int r, ip_num_rings; + struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip]; + + if (!adev || !mgr || !out_ring) + return -EINVAL; + + if (hw_ip >= AMDGPU_MAX_IP_NUM) + return -EINVAL; + + if (ring >= AMDGPU_MAX_RINGS) + return -EINVAL; + + /* Right now all IPs have only one instance - multiple rings. */ + if (instance != 0) { + DRM_ERROR("invalid ip instance: %d\n", instance); + return -EINVAL; + } + + switch (hw_ip) { + case AMDGPU_HW_IP_GFX: + ip_num_rings = adev->gfx.num_gfx_rings; + break; + case AMDGPU_HW_IP_COMPUTE: + ip_num_rings = adev->gfx.num_compute_rings; + break; + case AMDGPU_HW_IP_DMA: + ip_num_rings = adev->sdma.num_instances; + break; + case AMDGPU_HW_IP_UVD: + ip_num_rings = 1; + break; + case AMDGPU_HW_IP_VCE: + ip_num_rings = adev->vce.num_rings; + break; + case AMDGPU_HW_IP_UVD_ENC: + ip_num_rings = adev->uvd.num_enc_rings; + break; + case AMDGPU_HW_IP_VCN_DEC: + ip_num_rings = 1; + break; + case AMDGPU_HW_IP_VCN_ENC: + ip_num_rings = adev->vcn.num_enc_rings; + break; + default: + DRM_ERROR("unknown ip type: %d\n", hw_ip); + return -EINVAL; + } + + if (ring >= ip_num_rings) { + DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n", + ring, ip_num_rings, hw_ip); + return -EINVAL; + } + + mutex_lock(&mapper->lock); + + *out_ring = amdgpu_get_cached_map(mapper, ring); + if (*out_ring) { + /* cache hit */ + r = 0; + goto out_unlock; + } + + switch (mapper->hw_ip) { + case AMDGPU_HW_IP_GFX: + case AMDGPU_HW_IP_UVD: + case AMDGPU_HW_IP_VCE: + case AMDGPU_HW_IP_UVD_ENC: + case AMDGPU_HW_IP_VCN_DEC: + case AMDGPU_HW_IP_VCN_ENC: + r = amdgpu_identity_map(adev, mapper, ring, out_ring); + break; + case AMDGPU_HW_IP_DMA: + case AMDGPU_HW_IP_COMPUTE: + r = amdgpu_lru_map(adev, mapper, ring, out_ring); + break; + default: + *out_ring = NULL; + r = -EINVAL; + DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); + } + +out_unlock: + mutex_unlock(&mapper->lock); + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6a85db0c0bc3..75165e07b1cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -135,6 +135,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) if (ring->funcs->end_use) ring->funcs->end_use(ring); + + amdgpu_ring_lru_touch(ring->adev, ring); } /** @@ -253,10 +255,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } ring->max_dw = max_dw; + INIT_LIST_HEAD(&ring->lru_list); + amdgpu_ring_lru_touch(adev, ring); if (amdgpu_debugfs_ring_init(adev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); } + return 0; } @@ -294,6 +299,84 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ring->adev->rings[ring->idx] = NULL; } +static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + /* list_move_tail handles the case where ring isn't part of the list */ + list_move_tail(&ring->lru_list, &adev->ring_lru_list); +} + +static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring, + int *blacklist, int num_blacklist) +{ + int i; + + for (i = 0; i < num_blacklist; i++) { + if (ring->idx == blacklist[i]) + return true; + } + + return false; +} + +/** + * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block + * + * @adev: amdgpu_device pointer + * @type: amdgpu_ring_type enum + * @blacklist: blacklisted ring ids array + * @num_blacklist: number of entries in @blacklist + * @ring: output ring + * + * Retrieve the amdgpu_ring structure for the least recently used ring of + * a specific IP block (all asics). + * Returns 0 on success, error on failure. + */ +int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, + int num_blacklist, struct amdgpu_ring **ring) +{ + struct amdgpu_ring *entry; + + /* List is sorted in LRU order, find first entry corresponding + * to the desired HW IP */ + *ring = NULL; + spin_lock(&adev->ring_lru_list_lock); + list_for_each_entry(entry, &adev->ring_lru_list, lru_list) { + if (entry->funcs->type != type) + continue; + + if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist)) + continue; + + *ring = entry; + amdgpu_ring_lru_touch_locked(adev, *ring); + break; + } + spin_unlock(&adev->ring_lru_list_lock); + + if (!*ring) { + DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type); + return -EINVAL; + } + + return 0; +} + +/** + * amdgpu_ring_lru_touch - mark a ring as recently being used + * + * @adev: amdgpu_device pointer + * @ring: ring to touch + * + * Move @ring to the tail of the lru list + */ +void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + spin_lock(&adev->ring_lru_list_lock); + amdgpu_ring_lru_touch_locked(adev, ring); + spin_unlock(&adev->ring_lru_list_lock); +} + /* * Debugfs info */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 944443c5b90a..bc8dec992f73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -47,7 +47,9 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_UVD, AMDGPU_RING_TYPE_VCE, AMDGPU_RING_TYPE_KIQ, - AMDGPU_RING_TYPE_UVD_ENC + AMDGPU_RING_TYPE_UVD_ENC, + AMDGPU_RING_TYPE_VCN_DEC, + AMDGPU_RING_TYPE_VCN_ENC }; struct amdgpu_device; @@ -76,6 +78,7 @@ struct amdgpu_fence_driver { int amdgpu_fence_driver_init(struct amdgpu_device *adev); void amdgpu_fence_driver_fini(struct amdgpu_device *adev); void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); +void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission); @@ -130,6 +133,7 @@ struct amdgpu_ring_funcs { int (*test_ib)(struct amdgpu_ring *ring, long timeout); /* insert NOP packets */ void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); + void (*insert_start)(struct amdgpu_ring *ring); void (*insert_end)(struct amdgpu_ring *ring); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); @@ -142,6 +146,7 @@ struct amdgpu_ring_funcs { void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); + void (*emit_tmz)(struct amdgpu_ring *ring, bool start); }; struct amdgpu_ring { @@ -149,6 +154,7 @@ struct amdgpu_ring { const struct amdgpu_ring_funcs *funcs; struct amdgpu_fence_driver fence_drv; struct amd_gpu_scheduler sched; + struct list_head lru_list; struct amdgpu_bo *ring_obj; volatile uint32_t *ring; @@ -180,6 +186,7 @@ struct amdgpu_ring { u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; unsigned vm_inv_eng; + bool has_compute_vm_bug; #if defined(CONFIG_DEBUG_FS) struct dentry *ent; #endif @@ -194,6 +201,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); void amdgpu_ring_fini(struct amdgpu_ring *ring); +int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, + int num_blacklist, struct amdgpu_ring **ring); +void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { int i = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index ed814e6d0207..a6899180b265 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -298,6 +298,25 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) return NULL; } +int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + int i, r; + + hash_for_each_safe(sync->fences, i, tmp, e, node) { + r = dma_fence_wait(e->fence, intr); + if (r) + return r; + + hash_del(&e->node); + dma_fence_put(e->fence); + kmem_cache_free(amdgpu_sync_slab, e); + } + + return 0; +} + /** * amdgpu_sync_free - free the sync object * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 605be266e07f..dc7687993317 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -49,6 +49,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); +int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); void amdgpu_sync_fini(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5db0230e45c6..c9b131b13ef7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -29,11 +29,11 @@ * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> * Dave Airlie */ -#include <ttm/ttm_bo_api.h> -#include <ttm/ttm_bo_driver.h> -#include <ttm/ttm_placement.h> -#include <ttm/ttm_module.h> -#include <ttm/ttm_page_alloc.h> +#include <drm/ttm/ttm_bo_api.h> +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_module.h> +#include <drm/ttm/ttm_page_alloc.h> #include <drm/drmP.h> #include <drm/amdgpu_drm.h> #include <linux/seq_file.h> @@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) return r; } + spin_lock(>t->adev->gtt_list_lock); flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, @@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) if (r) { DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", ttm->num_pages, gtt->offset); - return r; + goto error_gart_bind; } - spin_lock(>t->adev->gtt_list_lock); + list_add_tail(>t->list, >t->adev->gtt_list); +error_gart_bind: spin_unlock(>t->adev->gtt_list_lock); - return 0; + return r; } int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) @@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + int r; if (gtt->userptr) amdgpu_ttm_tt_unpin_userptr(ttm); @@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) return 0; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ - if (gtt->adev->gart.ready) - amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); - spin_lock(>t->adev->gtt_list_lock); + r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); + if (r) { + DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", + gtt->ttm.ttm.num_pages, gtt->offset); + goto error_unbind; + } list_del_init(>t->list); +error_unbind: spin_unlock(>t->adev->gtt_list_lock); - - return 0; + return r; } static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) @@ -1115,7 +1121,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Change the size here instead of the init above so only lpfn is affected */ amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); - r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true, + r = amdgpu_bo_create(adev, adev->mc.stolen_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, @@ -1462,6 +1468,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (*pos >= adev->mc.mc_vram_size) + return -ENXIO; + while (size) { unsigned long flags; uint32_t value; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index dfd1c98efa7c..4f50eeb65855 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -197,6 +197,27 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr) } } +void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr) +{ + uint16_t version_major = le16_to_cpu(hdr->header_version_major); + uint16_t version_minor = le16_to_cpu(hdr->header_version_minor); + + DRM_DEBUG("GPU_INFO\n"); + amdgpu_ucode_print_common_hdr(hdr); + + if (version_major == 1) { + const struct gpu_info_firmware_header_v1_0 *gpu_info_hdr = + container_of(hdr, struct gpu_info_firmware_header_v1_0, header); + + DRM_DEBUG("version_major: %u\n", + le16_to_cpu(gpu_info_hdr->version_major)); + DRM_DEBUG("version_minor: %u\n", + le16_to_cpu(gpu_info_hdr->version_minor)); + } else { + DRM_ERROR("Unknown gpu_info ucode version: %u.%u\n", version_major, version_minor); + } +} + int amdgpu_ucode_validate(const struct firmware *fw) { const struct common_firmware_header *hdr = @@ -253,6 +274,15 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + case CHIP_RAVEN: +#if 0 + if (!load_type) + return AMDGPU_FW_LOAD_DIRECT; + else + return AMDGPU_FW_LOAD_PSP; +#else + return AMDGPU_FW_LOAD_DIRECT; +#endif default: DRM_ERROR("Unknow firmware load type\n"); } @@ -349,7 +379,8 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, - 0, NULL, NULL, bo); + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, bo); if (err) { dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); goto failed; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 758f03a1770d..30b5500dc152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -113,6 +113,32 @@ struct sdma_firmware_header_v1_1 { uint32_t digest_size; }; +/* gpu info payload */ +struct gpu_info_firmware_v1_0 { + uint32_t gc_num_se; + uint32_t gc_num_cu_per_sh; + uint32_t gc_num_sh_per_se; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_tccs; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; +}; + +/* version_major=1, version_minor=0 */ +struct gpu_info_firmware_header_v1_0 { + struct common_firmware_header header; + uint16_t version_major; /* version */ + uint16_t version_minor; /* version */ +}; + /* header is fixed size */ union amdgpu_firmware_header { struct common_firmware_header common; @@ -124,6 +150,7 @@ union amdgpu_firmware_header { struct rlc_firmware_header_v2_0 rlc_v2_0; struct sdma_firmware_header_v1_0 sdma; struct sdma_firmware_header_v1_1 sdma_v1_1; + struct gpu_info_firmware_header_v1_0 gpu_info; uint8_t raw[0x100]; }; @@ -184,6 +211,7 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); +void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr); int amdgpu_ucode_validate(const struct firmware *fw); bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, uint16_t hdr_major, uint16_t hdr_minor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 735c38d7db0d..b692ad402252 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -165,35 +165,14 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | (binary_id << 8)); - /* allocate firmware, stack and heap BO */ - - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &adev->vce.vcpu_bo); + r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo, + &adev->vce.gpu_addr, &adev->vce.cpu_addr); if (r) { dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); return r; } - r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); - if (r) { - amdgpu_bo_unref(&adev->vce.vcpu_bo); - dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); - return r; - } - - r = amdgpu_bo_pin(adev->vce.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM, - &adev->vce.gpu_addr); - amdgpu_bo_unreserve(adev->vce.vcpu_bo); - if (r) { - amdgpu_bo_unref(&adev->vce.vcpu_bo); - dev_err(adev->dev, "(%d) VCE bo pin failed\n", r); - return r; - } - - ring = &adev->vce.ring[0]; rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, @@ -230,7 +209,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); - amdgpu_bo_unref(&adev->vce.vcpu_bo); + amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, + (void **)&adev->vce.cpu_addr); for (i = 0; i < adev->vce.num_rings; i++) amdgpu_ring_fini(&adev->vce.ring[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 0a7f18c461e4..5ce54cde472d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -33,6 +33,8 @@ struct amdgpu_vce { struct amdgpu_bo *vcpu_bo; uint64_t gpu_addr; + void *cpu_addr; + void *saved_bo; unsigned fw_version; unsigned fb_version; atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c new file mode 100644 index 000000000000..09190fadd228 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -0,0 +1,654 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include <drm/drmP.h> +#include <drm/drm.h> + +#include "amdgpu.h" +#include "amdgpu_pm.h" +#include "amdgpu_vcn.h" +#include "soc15d.h" +#include "soc15_common.h" + +#include "vega10/soc15ip.h" +#include "raven1/VCN/vcn_1_0_offset.h" + +/* 1 second timeout */ +#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) + +/* Firmware Names */ +#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" + +MODULE_FIRMWARE(FIRMWARE_RAVEN); + +static void amdgpu_vcn_idle_work_handler(struct work_struct *work); + +int amdgpu_vcn_sw_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + struct amd_sched_rq *rq; + unsigned long bo_size; + const char *fw_name; + const struct common_firmware_header *hdr; + unsigned version_major, version_minor, family_id; + int r; + + INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); + + switch (adev->asic_type) { + case CHIP_RAVEN: + fw_name = FIRMWARE_RAVEN; + break; + default: + return -EINVAL; + } + + r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); + if (r) { + dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", + fw_name); + return r; + } + + r = amdgpu_ucode_validate(adev->vcn.fw); + if (r) { + dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", + fw_name); + release_firmware(adev->vcn.fw); + adev->vcn.fw = NULL; + return r; + } + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + family_id = le32_to_cpu(hdr->ucode_version) & 0xff; + version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; + version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; + DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", + version_major, version_minor, family_id); + + + bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) + + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE + + AMDGPU_VCN_SESSION_SIZE * 40; + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, + &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + return r; + } + + ring = &adev->vcn.ring_dec; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, + rq, amdgpu_sched_jobs); + if (r != 0) { + DRM_ERROR("Failed setting up VCN dec run queue.\n"); + return r; + } + + ring = &adev->vcn.ring_enc[0]; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, + rq, amdgpu_sched_jobs); + if (r != 0) { + DRM_ERROR("Failed setting up VCN enc run queue.\n"); + return r; + } + + return 0; +} + +int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) +{ + int i; + + kfree(adev->vcn.saved_bo); + + amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); + + amd_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc); + + amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, + &adev->vcn.gpu_addr, + (void **)&adev->vcn.cpu_addr); + + amdgpu_ring_fini(&adev->vcn.ring_dec); + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->vcn.ring_enc[i]); + + release_firmware(adev->vcn.fw); + + return 0; +} + +int amdgpu_vcn_suspend(struct amdgpu_device *adev) +{ + unsigned size; + void *ptr; + + if (adev->vcn.vcpu_bo == NULL) + return 0; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + size = amdgpu_bo_size(adev->vcn.vcpu_bo); + ptr = adev->vcn.cpu_addr; + + adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL); + if (!adev->vcn.saved_bo) + return -ENOMEM; + + memcpy_fromio(adev->vcn.saved_bo, ptr, size); + + return 0; +} + +int amdgpu_vcn_resume(struct amdgpu_device *adev) +{ + unsigned size; + void *ptr; + + if (adev->vcn.vcpu_bo == NULL) + return -EINVAL; + + size = amdgpu_bo_size(adev->vcn.vcpu_bo); + ptr = adev->vcn.cpu_addr; + + if (adev->vcn.saved_bo != NULL) { + memcpy_toio(ptr, adev->vcn.saved_bo, size); + kfree(adev->vcn.saved_bo); + adev->vcn.saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + memset_io(ptr, 0, size); + } + + return 0; +} + +static void amdgpu_vcn_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, vcn.idle_work.work); + unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); + + if (fences == 0) { + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + } + } else { + schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + } +} + +void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + + if (set_clocks) { + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, true); + } else { + amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); + } + } +} + +void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) +{ + schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); +} + +int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + +static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, + bool direct, struct dma_fence **fence) +{ + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + struct list_head head; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + struct amdgpu_device *adev = ring->adev; + uint64_t addr; + int i, r; + + memset(&tv, 0, sizeof(tv)); + tv.bo = &bo->tbo; + + INIT_LIST_HEAD(&head); + list_add(&tv.head, &head); + + r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) + goto err; + + r = amdgpu_job_alloc_with_ib(adev, 64, &job); + if (r) + goto err; + + ib = &job->ibs[0]; + addr = amdgpu_bo_gpu_offset(bo); + ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0); + ib->ptr[1] = addr; + ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0); + ib->ptr[3] = addr >> 32; + ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0); + ib->ptr[5] = 0; + for (i = 6; i < 16; i += 2) { + ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0); + ib->ptr[i+1] = 0; + } + ib->length_dw = 16; + + if (direct) { + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err_free; + + amdgpu_job_free(job); + } else { + r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec, + AMDGPU_FENCE_OWNER_UNDEFINED, &f); + if (r) + goto err_free; + } + + ttm_eu_fence_buffer_objects(&ticket, &head, f); + + if (fence) + *fence = dma_fence_get(f); + amdgpu_bo_unref(&bo); + dma_fence_put(f); + + return 0; + +err_free: + amdgpu_job_free(job); + +err: + ttm_eu_backoff_reservation(&ticket, &head); + return r; +} + +static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_bo *bo; + uint32_t *msg; + int r, i; + + r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, &bo); + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + r = amdgpu_bo_kmap(bo, (void **)&msg); + if (r) { + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + return r; + } + + msg[0] = cpu_to_le32(0x00000028); + msg[1] = cpu_to_le32(0x00000038); + msg[2] = cpu_to_le32(0x00000001); + msg[3] = cpu_to_le32(0x00000000); + msg[4] = cpu_to_le32(handle); + msg[5] = cpu_to_le32(0x00000000); + msg[6] = cpu_to_le32(0x00000001); + msg[7] = cpu_to_le32(0x00000028); + msg[8] = cpu_to_le32(0x00000010); + msg[9] = cpu_to_le32(0x00000000); + msg[10] = cpu_to_le32(0x00000007); + msg[11] = cpu_to_le32(0x00000000); + msg[12] = cpu_to_le32(0x00000780); + msg[13] = cpu_to_le32(0x00000440); + for (i = 14; i < 1024; ++i) + msg[i] = cpu_to_le32(0x0); + + amdgpu_bo_kunmap(bo); + amdgpu_bo_unreserve(bo); + + return amdgpu_vcn_dec_send_msg(ring, bo, true, fence); +} + +static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_bo *bo; + uint32_t *msg; + int r, i; + + r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, &bo); + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + r = amdgpu_bo_kmap(bo, (void **)&msg); + if (r) { + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + return r; + } + + msg[0] = cpu_to_le32(0x00000028); + msg[1] = cpu_to_le32(0x00000018); + msg[2] = cpu_to_le32(0x00000000); + msg[3] = cpu_to_le32(0x00000002); + msg[4] = cpu_to_le32(handle); + msg[5] = cpu_to_le32(0x00000000); + for (i = 6; i < 1024; ++i) + msg[i] = cpu_to_le32(0x0); + + amdgpu_bo_kunmap(bo); + amdgpu_bo_unreserve(bo); + + return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence); +} + +int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct dma_fence *fence; + long r; + + r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); + if (r) { + DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + goto error; + } + + r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence); + if (r) { + DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + goto error; + } + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + } else { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } + + dma_fence_put(fence); + +error: + return r; +} + +int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t rptr = amdgpu_ring_get_rptr(ring); + unsigned i; + int r; + + r = amdgpu_ring_alloc(ring, 16); + if (r) { + DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + amdgpu_ring_write(ring, VCN_ENC_CMD_END); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (amdgpu_ring_get_rptr(ring) != rptr) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed\n", + ring->idx); + r = -ETIMEDOUT; + } + + return r; +} + +static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + const unsigned ib_size_dw = 16; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + uint64_t dummy; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + dummy = ib->gpu_addr + 1024; + + ib->length_dw = 0; + ib->ptr[ib->length_dw++] = 0x00000018; + ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ + ib->ptr[ib->length_dw++] = handle; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = 0x0000000b; + + ib->ptr[ib->length_dw++] = 0x00000014; + ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ + ib->ptr[ib->length_dw++] = 0x0000001c; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = 0x00000000; + + ib->ptr[ib->length_dw++] = 0x00000008; + ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ + + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err; + + amdgpu_job_free(job); + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + const unsigned ib_size_dw = 16; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + uint64_t dummy; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + dummy = ib->gpu_addr + 1024; + + ib->length_dw = 0; + ib->ptr[ib->length_dw++] = 0x00000018; + ib->ptr[ib->length_dw++] = 0x00000001; + ib->ptr[ib->length_dw++] = handle; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = 0x0000000b; + + ib->ptr[ib->length_dw++] = 0x00000014; + ib->ptr[ib->length_dw++] = 0x00000002; + ib->ptr[ib->length_dw++] = 0x0000001c; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = 0x00000000; + + ib->ptr[ib->length_dw++] = 0x00000008; + ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ + + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err; + + amdgpu_job_free(job); + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct dma_fence *fence = NULL; + long r; + + r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); + if (r) { + DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + goto error; + } + + r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); + if (r) { + DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + goto error; + } + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + } else { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } +error: + dma_fence_put(fence); + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h new file mode 100644 index 000000000000..d50ba0657854 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -0,0 +1,77 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_VCN_H__ +#define __AMDGPU_VCN_H__ + +#define AMDGPU_VCN_STACK_SIZE (200*1024) +#define AMDGPU_VCN_HEAP_SIZE (256*1024) +#define AMDGPU_VCN_SESSION_SIZE (50*1024) +#define AMDGPU_VCN_FIRMWARE_OFFSET 256 +#define AMDGPU_VCN_MAX_ENC_RINGS 3 + +#define VCN_DEC_CMD_FENCE 0x00000000 +#define VCN_DEC_CMD_TRAP 0x00000001 +#define VCN_DEC_CMD_WRITE_REG 0x00000004 +#define VCN_DEC_CMD_REG_READ_COND_WAIT 0x00000006 +#define VCN_DEC_CMD_PACKET_START 0x0000000a +#define VCN_DEC_CMD_PACKET_END 0x0000000b + +#define VCN_ENC_CMD_NO_OP 0x00000000 +#define VCN_ENC_CMD_END 0x00000001 +#define VCN_ENC_CMD_IB 0x00000002 +#define VCN_ENC_CMD_FENCE 0x00000003 +#define VCN_ENC_CMD_TRAP 0x00000004 +#define VCN_ENC_CMD_REG_WRITE 0x0000000b +#define VCN_ENC_CMD_REG_WAIT 0x0000000c + +struct amdgpu_vcn { + struct amdgpu_bo *vcpu_bo; + void *cpu_addr; + uint64_t gpu_addr; + unsigned fw_version; + void *saved_bo; + struct delayed_work idle_work; + const struct firmware *fw; /* VCN firmware */ + struct amdgpu_ring ring_dec; + struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; + struct amdgpu_irq_src irq; + struct amd_sched_entity entity_dec; + struct amd_sched_entity entity_enc; + unsigned num_enc_rings; +}; + +int amdgpu_vcn_sw_init(struct amdgpu_device *adev); +int amdgpu_vcn_sw_fini(struct amdgpu_device *adev); +int amdgpu_vcn_suspend(struct amdgpu_device *adev); +int amdgpu_vcn_resume(struct amdgpu_device *adev); +void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring); + +int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); + +int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 6bf5cea294f2..8a081e162d13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" +#define MAX_KIQ_REG_WAIT 100000 int amdgpu_allocate_static_csa(struct amdgpu_device *adev) { @@ -105,8 +106,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) /* enable virtual display */ adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; + adev->cg_flags = 0; + adev->pg_flags = 0; - mutex_init(&adev->virt.lock_kiq); mutex_init(&adev->virt.lock_reset); } @@ -120,17 +122,19 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) BUG_ON(!ring->funcs->emit_rreg); - mutex_lock(&adev->virt.lock_kiq); + mutex_lock(&kiq->ring_mutex); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); - mutex_unlock(&adev->virt.lock_kiq); + mutex_unlock(&kiq->ring_mutex); - r = dma_fence_wait(f, false); - if (r) - DRM_ERROR("wait for kiq fence error: %ld.\n", r); + r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); dma_fence_put(f); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return ~0; + } val = adev->wb.wb[adev->virt.reg_val_offs]; @@ -146,15 +150,15 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) BUG_ON(!ring->funcs->emit_wreg); - mutex_lock(&adev->virt.lock_kiq); + mutex_lock(&kiq->ring_mutex); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); - mutex_unlock(&adev->virt.lock_kiq); + mutex_unlock(&kiq->ring_mutex); - r = dma_fence_wait(f, false); - if (r) + r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); + if (r < 1) DRM_ERROR("wait for kiq fence error: %ld.\n", r); dma_fence_put(f); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index a8ed162cc0bc..9e1062edb76e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -52,7 +52,6 @@ struct amdgpu_virt { uint64_t csa_vmid0_addr; bool chained_ib_support; uint32_t reg_val_offs; - struct mutex lock_kiq; struct mutex lock_reset; struct amdgpu_irq_src ack_irq; struct amdgpu_irq_src rcv_irq; @@ -97,7 +96,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); -int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary); +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8ecf82c5fe74..5795f81369f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -79,6 +79,12 @@ struct amdgpu_pte_update_params { uint64_t flags); /* indicate update pt or its shadow */ bool shadow; + /* The next two are used during VM update by CPU + * DMA addresses to use for mapping + * Kernel pointer of PD/PT BO that needs to be updated + */ + dma_addr_t *pages_addr; + void *kptr; }; /* Helper to disable partial resident texture feature from a fence callback */ @@ -275,12 +281,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, adev->vm_manager.block_size; unsigned pt_idx, from, to; int r; + u64 flags; if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); - parent->entries = drm_calloc_large(num_entries, - sizeof(struct amdgpu_vm_pt)); + parent->entries = kvmalloc_array(num_entries, + sizeof(struct amdgpu_vm_pt), + GFP_KERNEL | __GFP_ZERO); if (!parent->entries) return -ENOMEM; memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); @@ -299,6 +307,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (vm->use_cpu_for_update) + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW); + /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { struct reservation_object *resv = vm->root.bo->tbo.resv; @@ -310,10 +326,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, amdgpu_vm_bo_size(adev, level), AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, + flags, NULL, resv, &pt); if (r) return r; @@ -391,6 +404,71 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, atomic_read(&adev->gpu_reset_counter); } +static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) +{ + return !!vm->reserved_vmid[vmhub]; +} + +/* idr_mgr->lock must be held */ +static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct dma_fence *fence, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + uint64_t fence_context = adev->fence_context + ring->idx; + struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub]; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct dma_fence *updates = sync->last_vm_update; + int r = 0; + struct dma_fence *flushed, *tmp; + bool needs_flush = false; + + flushed = id->flushed_updates; + if ((amdgpu_vm_had_gpu_reset(adev, id)) || + (atomic64_read(&id->owner) != vm->client_id) || + (job->vm_pd_addr != id->pd_gpu_addr) || + (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) || + (!id->last_flush || (id->last_flush->context != fence_context && + !dma_fence_is_signaled(id->last_flush)))) { + needs_flush = true; + /* to prevent one context starved by another context */ + id->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&id->active, ring); + if (tmp) { + r = amdgpu_sync_fence(adev, sync, tmp); + return r; + } + } + + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence); + if (r) + goto out; + + if (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) { + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + } + id->pd_gpu_addr = job->vm_pd_addr; + atomic64_set(&id->owner, vm->client_id); + job->vm_needs_flush = needs_flush; + if (needs_flush) { + dma_fence_put(id->last_flush); + id->last_flush = NULL; + } + job->vm_id = id - id_mgr->ids; + trace_amdgpu_vm_grab_id(vm, ring, job); +out: + return r; +} + /** * amdgpu_vm_grab_id - allocate the next free VMID * @@ -415,12 +493,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, unsigned i; int r = 0; + mutex_lock(&id_mgr->lock); + if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) { + r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job); + mutex_unlock(&id_mgr->lock); + return r; + } fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); - if (!fences) + if (!fences) { + mutex_unlock(&id_mgr->lock); return -ENOMEM; - - mutex_lock(&id_mgr->lock); - + } /* Check if we have an idle VMID */ i = 0; list_for_each_entry(idle, &id_mgr->ids_lru, list) { @@ -521,7 +604,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, id->pd_gpu_addr = job->vm_pd_addr; dma_fence_put(id->flushed_updates); id->flushed_updates = dma_fence_get(updates); - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); atomic64_set(&id->owner, vm->client_id); needs_flush: @@ -540,40 +622,118 @@ error: return r; } -static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) +static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) { + list_add(&vm->reserved_vmid[vmhub]->list, + &id_mgr->ids_lru); + vm->reserved_vmid[vmhub] = NULL; + atomic_dec(&id_mgr->reserved_vmid_num); + } + mutex_unlock(&id_mgr->lock); +} + +static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vm_id_manager *id_mgr; + struct amdgpu_vm_id *idle; + int r = 0; + + id_mgr = &adev->vm_manager.id_mgr[vmhub]; + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) + goto unlock; + if (atomic_inc_return(&id_mgr->reserved_vmid_num) > + AMDGPU_VM_MAX_RESERVED_VMID) { + DRM_ERROR("Over limitation of reserved vmid\n"); + atomic_dec(&id_mgr->reserved_vmid_num); + r = -EINVAL; + goto unlock; + } + /* Select the first entry VMID */ + idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list); + list_del_init(&idle->list); + vm->reserved_vmid[vmhub] = idle; + mutex_unlock(&id_mgr->lock); + + return 0; +unlock: + mutex_unlock(&id_mgr->lock); + return r; +} + +/** + * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug + * + * @adev: amdgpu_device pointer + */ +void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) { - struct amdgpu_device *adev = ring->adev; const struct amdgpu_ip_block *ip_block; + bool has_compute_vm_bug; + struct amdgpu_ring *ring; + int i; - if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) - /* only compute rings */ - return false; + has_compute_vm_bug = false; ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); - if (!ip_block) - return false; + if (ip_block) { + /* Compute has a VM bug for GFX version < 7. + Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ + if (ip_block->version->major <= 7) + has_compute_vm_bug = true; + else if (ip_block->version->major == 8) + if (adev->gfx.mec_fw_version < 673) + has_compute_vm_bug = true; + } - if (ip_block->version->major <= 7) { - /* gfx7 has no workaround */ - return true; - } else if (ip_block->version->major == 8) { - if (adev->gfx.mec_fw_version >= 673) - /* gfx8 is fixed in MEC firmware 673 */ - return false; + for (i = 0; i < adev->num_rings; i++) { + ring = adev->rings[i]; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + /* only compute rings */ + ring->has_compute_vm_bug = has_compute_vm_bug; else - return true; + ring->has_compute_vm_bug = false; } - return false; } -static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) +bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, + struct amdgpu_job *job) { - u64 addr = mc_addr; + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vm_id *id; + bool gds_switch_needed; + bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; + + if (job->vm_id == 0) + return false; + id = &id_mgr->ids[job->vm_id]; + gds_switch_needed = ring->funcs->emit_gds_switch && ( + id->gds_base != job->gds_base || + id->gds_size != job->gds_size || + id->gws_base != job->gws_base || + id->gws_size != job->gws_size || + id->oa_base != job->oa_base || + id->oa_size != job->oa_size); - if (adev->gart.gart_funcs->adjust_mc_addr) - addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr); + if (amdgpu_vm_had_gpu_reset(adev, id)) + return true; - return addr; + return vm_flush_needed || gds_switch_needed; +} + +static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) +{ + return (adev->mc.real_vram_size == adev->mc.visible_vram_size); } /** @@ -598,8 +758,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) id->gws_size != job->gws_size || id->oa_base != job->oa_base || id->oa_size != job->oa_size); - bool vm_flush_needed = job->vm_needs_flush || - amdgpu_vm_ring_has_compute_vm_bug(ring); + bool vm_flush_needed = job->vm_needs_flush; unsigned patch_offset = 0; int r; @@ -614,15 +773,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); - if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync) - amdgpu_ring_emit_pipeline_sync(ring); - if (ring->funcs->emit_vm_flush && vm_flush_needed) { - u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); struct dma_fence *fence; - trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr); - amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); + trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr); + amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); r = amdgpu_fence_emit(ring, &fence); if (r) @@ -631,6 +786,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) mutex_lock(&id_mgr->lock); dma_fence_put(id->last_flush); id->last_flush = fence; + id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); mutex_unlock(&id_mgr->lock); } @@ -805,6 +961,53 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) return result; } +/** + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU + * + * @params: see amdgpu_pte_update_params definition + * @pe: kmap addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Write count number of PT/PD entries directly. + */ +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint64_t flags) +{ + unsigned int i; + uint64_t value; + + for (i = 0; i < count; i++) { + value = params->pages_addr ? + amdgpu_vm_map_gart(params->pages_addr, addr) : + addr; + amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, + i, value, flags); + addr += incr; + } + + /* Flush HDP */ + mb(); + amdgpu_gart_flush_gpu_tlb(params->adev, 0); +} + +static int amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + struct amdgpu_sync sync; + int r; + + amdgpu_sync_create(&sync); + amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); + r = amdgpu_sync_wait(&sync, true); + amdgpu_sync_free(&sync); + + return r; +} + /* * amdgpu_vm_update_level - update a single level in the hierarchy * @@ -821,11 +1024,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, unsigned level) { struct amdgpu_bo *shadow; - struct amdgpu_ring *ring; - uint64_t pd_addr, shadow_addr; + struct amdgpu_ring *ring = NULL; + uint64_t pd_addr, shadow_addr = 0; uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; - unsigned count = 0, pt_idx, ndw; + unsigned count = 0, pt_idx, ndw = 0; struct amdgpu_job *job; struct amdgpu_pte_update_params params; struct dma_fence *fence = NULL; @@ -834,34 +1037,54 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, if (!parent->entries) return 0; - ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - /* padding, etc. */ - ndw = 64; + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + shadow = parent->bo->shadow; - /* assume the worst case */ - ndw += parent->last_entry_used * 6; + WARN_ON(vm->use_cpu_for_update && shadow); + if (vm->use_cpu_for_update && !shadow) { + r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr); + if (r) + return r; + r = amdgpu_vm_bo_wait(adev, parent->bo); + if (unlikely(r)) { + amdgpu_bo_kunmap(parent->bo); + return r; + } + params.func = amdgpu_vm_cpu_set_ptes; + } else { + if (shadow) { + r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); + if (r) + return r; + } + ring = container_of(vm->entity.sched, struct amdgpu_ring, + sched); - pd_addr = amdgpu_bo_gpu_offset(parent->bo); + /* padding, etc. */ + ndw = 64; - shadow = parent->bo->shadow; - if (shadow) { - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); + /* assume the worst case */ + ndw += parent->last_entry_used * 6; + + pd_addr = amdgpu_bo_gpu_offset(parent->bo); + + if (shadow) { + shadow_addr = amdgpu_bo_gpu_offset(shadow); + ndw *= 2; + } else { + shadow_addr = 0; + } + + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); if (r) return r; - shadow_addr = amdgpu_bo_gpu_offset(shadow); - ndw *= 2; - } else { - shadow_addr = 0; - } - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); - if (r) - return r; + params.ib = &job->ibs[0]; + params.func = amdgpu_vm_do_set_ptes; + } - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - params.ib = &job->ibs[0]; /* walk over the address space and update the directory */ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { @@ -881,6 +1104,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, } pt = amdgpu_bo_gpu_offset(bo); + pt = amdgpu_gart_get_vm_pde(adev, pt); if (parent->entries[pt_idx].addr == pt) continue; @@ -892,19 +1116,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { if (count) { - uint64_t pt_addr = - amdgpu_vm_adjust_mc_addr(adev, last_pt); - if (shadow) - amdgpu_vm_do_set_ptes(¶ms, - last_shadow, - pt_addr, count, - incr, - AMDGPU_PTE_VALID); - - amdgpu_vm_do_set_ptes(¶ms, last_pde, - pt_addr, count, incr, - AMDGPU_PTE_VALID); + params.func(¶ms, + last_shadow, + last_pt, count, + incr, + AMDGPU_PTE_VALID); + + params.func(¶ms, last_pde, + last_pt, count, incr, + AMDGPU_PTE_VALID); } count = 1; @@ -917,17 +1138,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, } if (count) { - uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt); - if (vm->root.bo->shadow) - amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr, - count, incr, AMDGPU_PTE_VALID); + params.func(¶ms, last_shadow, last_pt, + count, incr, AMDGPU_PTE_VALID); - amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr, - count, incr, AMDGPU_PTE_VALID); + params.func(¶ms, last_pde, last_pt, + count, incr, AMDGPU_PTE_VALID); } - if (params.ib->length_dw == 0) { + if (params.func == amdgpu_vm_cpu_set_ptes) + amdgpu_bo_kunmap(parent->bo); + else if (params.ib->length_dw == 0) { amdgpu_job_free(job); } else { amdgpu_ring_pad_ib(ring, params.ib); @@ -971,6 +1192,32 @@ error_free: } /* + * amdgpu_vm_invalidate_level - mark all PD levels as invalid + * + * @parent: parent PD + * + * Mark all PD level as invalid after an error. + */ +static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) +{ + unsigned pt_idx; + + /* + * Recurse into the subdirectories. This recursion is harmless because + * we only have a maximum of 5 layers. + */ + for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { + struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; + + if (!entry->bo) + continue; + + entry->addr = ~0ULL; + amdgpu_vm_invalidate_level(entry); + } +} + +/* * amdgpu_vm_update_directories - make sure that all directories are valid * * @adev: amdgpu_device pointer @@ -982,7 +1229,13 @@ error_free: int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - return amdgpu_vm_update_level(adev, vm, &vm->root, 0); + int r; + + r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); + if (r) + amdgpu_vm_invalidate_level(&vm->root); + + return r; } /** @@ -1022,58 +1275,37 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, * @flags: mapping flags * * Update the page tables in the range @start - @end. + * Returns 0 for success, -EINVAL for failure. */ -static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags) { struct amdgpu_device *adev = params->adev; const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; - uint64_t cur_pe_start, cur_nptes, cur_dst; - uint64_t addr; /* next GPU address to be updated */ + uint64_t addr, pe_start; struct amdgpu_bo *pt; - unsigned nptes; /* next number of ptes to be updated */ - uint64_t next_pe_start; - - /* initialize the variables */ - addr = start; - pt = amdgpu_vm_get_pt(params, addr); - if (!pt) { - pr_err("PT not found, aborting update_ptes\n"); - return; - } - - if (params->shadow) { - if (!pt->shadow) - return; - pt = pt->shadow; - } - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - - cur_pe_start = amdgpu_bo_gpu_offset(pt); - cur_pe_start += (addr & mask) * 8; - cur_nptes = nptes; - cur_dst = dst; + unsigned nptes; + int r; + bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); - /* for next ptb*/ - addr += nptes; - dst += nptes * AMDGPU_GPU_PAGE_SIZE; /* walk over the address space and update the page tables */ - while (addr < end) { + for (addr = start; addr < end; addr += nptes) { pt = amdgpu_vm_get_pt(params, addr); if (!pt) { pr_err("PT not found, aborting update_ptes\n"); - return; + return -EINVAL; } if (params->shadow) { + if (WARN_ONCE(use_cpu_update, + "CPU VM update doesn't suuport shadow pages")) + return 0; + if (!pt->shadow) - return; + return 0; pt = pt->shadow; } @@ -1082,32 +1314,25 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, else nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - next_pe_start = amdgpu_bo_gpu_offset(pt); - next_pe_start += (addr & mask) * 8; + if (use_cpu_update) { + r = amdgpu_bo_kmap(pt, (void *)&pe_start); + if (r) + return r; + } else + pe_start = amdgpu_bo_gpu_offset(pt); - if ((cur_pe_start + 8 * cur_nptes) == next_pe_start && - ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) { - /* The next ptb is consecutive to current ptb. - * Don't call the update function now. - * Will update two ptbs together in future. - */ - cur_nptes += nptes; - } else { - params->func(params, cur_pe_start, cur_dst, cur_nptes, - AMDGPU_GPU_PAGE_SIZE, flags); + pe_start += (addr & mask) * 8; - cur_pe_start = next_pe_start; - cur_nptes = nptes; - cur_dst = dst; - } + params->func(params, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); - /* for next ptb*/ - addr += nptes; dst += nptes * AMDGPU_GPU_PAGE_SIZE; + + if (use_cpu_update) + amdgpu_bo_kunmap(pt); } - params->func(params, cur_pe_start, cur_dst, cur_nptes, - AMDGPU_GPU_PAGE_SIZE, flags); + return 0; } /* @@ -1119,11 +1344,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, * @end: last PTE to handle * @dst: addr those PTEs should point to * @flags: hw mapping flags + * Returns 0 for success, -EINVAL for failure. */ -static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags) { + int r; + /** * The MC L1 TLB supports variable sized pages, based on a fragment * field in the PTE. When this field is set to a non-zero value, page @@ -1152,28 +1380,30 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, /* system pages are non continuously */ if (params->src || !(flags & AMDGPU_PTE_VALID) || - (frag_start >= frag_end)) { - - amdgpu_vm_update_ptes(params, start, end, dst, flags); - return; - } + (frag_start >= frag_end)) + return amdgpu_vm_update_ptes(params, start, end, dst, flags); /* handle the 4K area at the beginning */ if (start != frag_start) { - amdgpu_vm_update_ptes(params, start, frag_start, - dst, flags); + r = amdgpu_vm_update_ptes(params, start, frag_start, + dst, flags); + if (r) + return r; dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; } /* handle the area in the middle */ - amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, - flags | frag_flags); + r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, + flags | frag_flags); + if (r) + return r; /* handle the 4K area at the end */ if (frag_end != end) { dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; - amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); + r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); } + return r; } /** @@ -1215,6 +1445,25 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, params.vm = vm; params.src = src; + if (vm->use_cpu_for_update) { + /* params.src is used as flag to indicate system Memory */ + if (pages_addr) + params.src = ~0; + + /* Wait for PT BOs to be free. PTs share the same resv. object + * as the root PD BO + */ + r = amdgpu_vm_bo_wait(adev, vm->root.bo); + if (unlikely(r)) + return r; + + params.func = amdgpu_vm_cpu_set_ptes; + params.pages_addr = pages_addr; + params.shadow = false; + return amdgpu_vm_frag_ptes(¶ms, start, last + 1, + addr, flags); + } + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); /* sync to everything on unmapping */ @@ -1294,9 +1543,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, goto error_free; params.shadow = true; - amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + if (r) + goto error_free; params.shadow = false; - amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + if (r) + goto error_free; amdgpu_ring_pad_ib(ring, params.ib); WARN_ON(params.ib->length_dw > ndw); @@ -2137,20 +2390,25 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) * * @adev: amdgpu_device pointer * @vm: requested vm + * @vm_context: Indicates if it GFX or Compute context * * Init @vm fields. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int vm_context) { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT(adev) * 8); unsigned ring_instance; struct amdgpu_ring *ring; struct amd_sched_rq *rq; - int r; + int r, i; + u64 flags; vm->va = RB_ROOT; vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + vm->reserved_vmid[i] = NULL; spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->cleared); @@ -2167,14 +2425,29 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) return r; + if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_COMPUTE); + else + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_GFX); + DRM_DEBUG_DRIVER("VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + "CPU update of VM recommended only for large BAR system\n"); vm->last_dir_update = NULL; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (vm->use_cpu_for_update) + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW); + r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, + flags, NULL, NULL, &vm->root.bo); if (r) goto error_free_sched_entity; @@ -2219,7 +2492,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) for (i = 0; i <= level->last_entry_used; i++) amdgpu_vm_free_levels(&level->entries[i]); - drm_free_large(level->entries); + kvfree(level->entries); } /** @@ -2235,6 +2508,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; + int i; amd_sched_entity_fini(vm->entity.sched, &vm->entity); @@ -2258,6 +2532,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_levels(&vm->root); dma_fence_put(vm->last_dir_update); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + amdgpu_vm_free_reserved_vmid(adev, vm, i); } /** @@ -2277,6 +2553,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) mutex_init(&id_mgr->lock); INIT_LIST_HEAD(&id_mgr->ids_lru); + atomic_set(&id_mgr->reserved_vmid_num, 0); /* skip over VMID 0, since it is the system VM */ for (j = 1; j < id_mgr->num_ids; ++j) { @@ -2295,6 +2572,23 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) atomic64_set(&adev->vm_manager.client_counter, 0); spin_lock_init(&adev->vm_manager.prt_lock); atomic_set(&adev->vm_manager.num_prt_users, 0); + + /* If not overridden by the user, by default, only in large BAR systems + * Compute VM tables will be updated by CPU + */ +#ifdef CONFIG_X86_64 + if (amdgpu_vm_update_mode == -1) { + if (amdgpu_vm_is_large_bar(adev)) + adev->vm_manager.vm_update_mode = + AMDGPU_VM_USE_CPU_FOR_COMPUTE; + else + adev->vm_manager.vm_update_mode = 0; + } else + adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; +#else + adev->vm_manager.vm_update_mode = 0; +#endif + } /** @@ -2322,3 +2616,28 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) } } } + +int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + union drm_amdgpu_vm *args = data; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + int r; + + switch (args->in.op) { + case AMDGPU_VM_OP_RESERVE_VMID: + /* current, we only have requirement to reserve vmid from gfxhub */ + r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm, + AMDGPU_GFXHUB); + if (r) + return r; + break; + case AMDGPU_VM_OP_UNRESERVE_VMID: + amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB); + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index e1d951ece433..936f158bc5ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -84,6 +84,16 @@ struct amdgpu_bo_list_entry; /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (8 << 20) +/* max vmids dedicated for process */ +#define AMDGPU_VM_MAX_RESERVED_VMID 1 + +#define AMDGPU_VM_CONTEXT_GFX 0 +#define AMDGPU_VM_CONTEXT_COMPUTE 1 + +/* See vm_update_mode */ +#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) +#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) + struct amdgpu_vm_pt { struct amdgpu_bo *bo; @@ -123,8 +133,13 @@ struct amdgpu_vm { /* client id */ u64 client_id; + /* dedicated to vm */ + struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; /* each VM will map on CSA */ struct amdgpu_bo_va *csa_bo_va; + + /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ + bool use_cpu_for_update; }; struct amdgpu_vm_id { @@ -152,6 +167,7 @@ struct amdgpu_vm_id_manager { unsigned num_ids; struct list_head ids_lru; struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; + atomic_t reserved_vmid_num; }; struct amdgpu_vm_manager { @@ -168,8 +184,6 @@ struct amdgpu_vm_manager { uint32_t block_size; /* vram base address for page table entry */ u64 vram_base_offset; - /* is vm enabled? */ - bool enabled; /* vm pte handling */ const struct amdgpu_vm_pte_funcs *vm_pte_funcs; struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; @@ -181,11 +195,18 @@ struct amdgpu_vm_manager { /* partial resident texture handling */ spinlock_t prt_lock; atomic_t num_prt_users; + + /* controls how VM page tables are updated for Graphics and Compute. + * BIT0[= 0] Graphics updated by SDMA [= 1] by CPU + * BIT1[= 0] Compute updated by SDMA [= 1] by CPU + */ + int vm_update_mode; }; void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int vm_context); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, @@ -239,5 +260,9 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); +int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, + struct amdgpu_job *job); +void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index ec93714e4524..cb508a211b2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -22,7 +22,7 @@ */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_ucode.h" diff --git a/drivers/gpu/drm/amd/amdgpu/ci_smc.c b/drivers/gpu/drm/amd/amdgpu/ci_smc.c index 7eb9069db8e3..b8ba51e045b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_smc.c @@ -23,7 +23,7 @@ */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "cikd.h" #include "ppsmc.h" diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 9d33e5641419..37a499ab30eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -24,7 +24,7 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/module.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_ih.h" @@ -964,62 +964,62 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev, } static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { - {mmGRBM_STATUS, false}, - {mmGB_ADDR_CONFIG, false}, - {mmMC_ARB_RAMCFG, false}, - {mmGB_TILE_MODE0, false}, - {mmGB_TILE_MODE1, false}, - {mmGB_TILE_MODE2, false}, - {mmGB_TILE_MODE3, false}, - {mmGB_TILE_MODE4, false}, - {mmGB_TILE_MODE5, false}, - {mmGB_TILE_MODE6, false}, - {mmGB_TILE_MODE7, false}, - {mmGB_TILE_MODE8, false}, - {mmGB_TILE_MODE9, false}, - {mmGB_TILE_MODE10, false}, - {mmGB_TILE_MODE11, false}, - {mmGB_TILE_MODE12, false}, - {mmGB_TILE_MODE13, false}, - {mmGB_TILE_MODE14, false}, - {mmGB_TILE_MODE15, false}, - {mmGB_TILE_MODE16, false}, - {mmGB_TILE_MODE17, false}, - {mmGB_TILE_MODE18, false}, - {mmGB_TILE_MODE19, false}, - {mmGB_TILE_MODE20, false}, - {mmGB_TILE_MODE21, false}, - {mmGB_TILE_MODE22, false}, - {mmGB_TILE_MODE23, false}, - {mmGB_TILE_MODE24, false}, - {mmGB_TILE_MODE25, false}, - {mmGB_TILE_MODE26, false}, - {mmGB_TILE_MODE27, false}, - {mmGB_TILE_MODE28, false}, - {mmGB_TILE_MODE29, false}, - {mmGB_TILE_MODE30, false}, - {mmGB_TILE_MODE31, false}, - {mmGB_MACROTILE_MODE0, false}, - {mmGB_MACROTILE_MODE1, false}, - {mmGB_MACROTILE_MODE2, false}, - {mmGB_MACROTILE_MODE3, false}, - {mmGB_MACROTILE_MODE4, false}, - {mmGB_MACROTILE_MODE5, false}, - {mmGB_MACROTILE_MODE6, false}, - {mmGB_MACROTILE_MODE7, false}, - {mmGB_MACROTILE_MODE8, false}, - {mmGB_MACROTILE_MODE9, false}, - {mmGB_MACROTILE_MODE10, false}, - {mmGB_MACROTILE_MODE11, false}, - {mmGB_MACROTILE_MODE12, false}, - {mmGB_MACROTILE_MODE13, false}, - {mmGB_MACROTILE_MODE14, false}, - {mmGB_MACROTILE_MODE15, false}, - {mmCC_RB_BACKEND_DISABLE, false, true}, - {mmGC_USER_RB_BACKEND_DISABLE, false, true}, - {mmGB_BACKEND_MAP, false, false}, - {mmPA_SC_RASTER_CONFIG, false, true}, - {mmPA_SC_RASTER_CONFIG_1, false, true}, + {mmGRBM_STATUS}, + {mmGB_ADDR_CONFIG}, + {mmMC_ARB_RAMCFG}, + {mmGB_TILE_MODE0}, + {mmGB_TILE_MODE1}, + {mmGB_TILE_MODE2}, + {mmGB_TILE_MODE3}, + {mmGB_TILE_MODE4}, + {mmGB_TILE_MODE5}, + {mmGB_TILE_MODE6}, + {mmGB_TILE_MODE7}, + {mmGB_TILE_MODE8}, + {mmGB_TILE_MODE9}, + {mmGB_TILE_MODE10}, + {mmGB_TILE_MODE11}, + {mmGB_TILE_MODE12}, + {mmGB_TILE_MODE13}, + {mmGB_TILE_MODE14}, + {mmGB_TILE_MODE15}, + {mmGB_TILE_MODE16}, + {mmGB_TILE_MODE17}, + {mmGB_TILE_MODE18}, + {mmGB_TILE_MODE19}, + {mmGB_TILE_MODE20}, + {mmGB_TILE_MODE21}, + {mmGB_TILE_MODE22}, + {mmGB_TILE_MODE23}, + {mmGB_TILE_MODE24}, + {mmGB_TILE_MODE25}, + {mmGB_TILE_MODE26}, + {mmGB_TILE_MODE27}, + {mmGB_TILE_MODE28}, + {mmGB_TILE_MODE29}, + {mmGB_TILE_MODE30}, + {mmGB_TILE_MODE31}, + {mmGB_MACROTILE_MODE0}, + {mmGB_MACROTILE_MODE1}, + {mmGB_MACROTILE_MODE2}, + {mmGB_MACROTILE_MODE3}, + {mmGB_MACROTILE_MODE4}, + {mmGB_MACROTILE_MODE5}, + {mmGB_MACROTILE_MODE6}, + {mmGB_MACROTILE_MODE7}, + {mmGB_MACROTILE_MODE8}, + {mmGB_MACROTILE_MODE9}, + {mmGB_MACROTILE_MODE10}, + {mmGB_MACROTILE_MODE11}, + {mmGB_MACROTILE_MODE12}, + {mmGB_MACROTILE_MODE13}, + {mmGB_MACROTILE_MODE14}, + {mmGB_MACROTILE_MODE15}, + {mmCC_RB_BACKEND_DISABLE, true}, + {mmGC_USER_RB_BACKEND_DISABLE, true}, + {mmGB_BACKEND_MAP, false}, + {mmPA_SC_RASTER_CONFIG, true}, + {mmPA_SC_RASTER_CONFIG_1, true}, }; static uint32_t cik_read_indexed_register(struct amdgpu_device *adev, @@ -1050,11 +1050,10 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num, if (reg_offset != cik_allowed_read_registers[i].reg_offset) continue; - if (!cik_allowed_read_registers[i].untouched) - *value = cik_allowed_read_registers[i].grbm_indexed ? - cik_read_indexed_register(adev, se_num, - sh_num, reg_offset) : - RREG32(reg_offset); + *value = cik_allowed_read_registers[i].grbm_indexed ? + cik_read_indexed_register(adev, se_num, + sh_num, reg_offset) : + RREG32(reg_offset); return 0; } return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index c57c3f18af01..b8918432c572 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "cikd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index a5f294ebff5c..0c1209cdd1cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "vid.h" diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 5dffa27afa45..9f78c03a2e31 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 47bbc87f96d2..4bcf01dc567a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index d8c9a959493e..fd134a4629d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" @@ -118,14 +118,27 @@ static const struct { static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev, u32 block_offset, u32 reg) { - DRM_INFO("xxxx: dce_v6_0_audio_endpt_rreg ----no impl!!!!\n"); - return 0; + unsigned long flags; + u32 r; + + spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); + r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset); + spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); + + return r; } static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev, u32 block_offset, u32 reg, u32 v) { - DRM_INFO("xxxx: dce_v6_0_audio_endpt_wreg ----no impl!!!!\n"); + unsigned long flags; + + spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, + reg | AZALIA_F0_CODEC_ENDPOINT_INDEX__AZALIA_ENDPOINT_REG_WRITE_EN_MASK); + WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v); + spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); } static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc) @@ -501,21 +514,16 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev, static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev) { - int num_crtc = 0; - switch (adev->asic_type) { case CHIP_TAHITI: case CHIP_PITCAIRN: case CHIP_VERDE: - num_crtc = 6; - break; + return 6; case CHIP_OLAND: - num_crtc = 2; - break; + return 2; default: - num_crtc = 0; + return 0; } - return num_crtc; } void dce_v6_0_disable_dce(struct amdgpu_device *adev) @@ -1225,17 +1233,17 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev) dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads); } } -/* + static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev) { int i; - u32 offset, tmp; + u32 tmp; for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - offset = adev->mode_info.audio.pin[i].offset; - tmp = RREG32_AUDIO_ENDPT(offset, - AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); - if (((tmp & PORT_CONNECTIVITY_MASK) >> PORT_CONNECTIVITY_SHIFT) == 1) + tmp = RREG32_AUDIO_ENDPT(adev->mode_info.audio.pin[i].offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); + if (REG_GET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT, + PORT_CONNECTIVITY)) adev->mode_info.audio.pin[i].connected = false; else adev->mode_info.audio.pin[i].connected = true; @@ -1257,45 +1265,206 @@ static struct amdgpu_audio_pin *dce_v6_0_audio_get_pin(struct amdgpu_device *ade return NULL; } -static void dce_v6_0_afmt_audio_select_pin(struct drm_encoder *encoder) +static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder) { struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - u32 offset; if (!dig || !dig->afmt || !dig->afmt->pin) return; - offset = dig->afmt->offset; - - WREG32(AFMT_AUDIO_SRC_CONTROL + offset, - AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id)); - + WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, + REG_SET_FIELD(0, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, + dig->afmt->pin->id)); } static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - DRM_INFO("xxxx: dce_v6_0_audio_write_latency_fields---no imp!!!!!\n"); + struct amdgpu_device *adev = encoder->dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + int interlace = 0; + u32 tmp; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + interlace = 1; + + if (connector->latency_present[interlace]) { + tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + VIDEO_LIPSYNC, connector->video_latency[interlace]); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + AUDIO_LIPSYNC, connector->audio_latency[interlace]); + } else { + tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + VIDEO_LIPSYNC, 0); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + AUDIO_LIPSYNC, 0); + } + WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); } static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - DRM_INFO("xxxx: dce_v6_0_audio_write_speaker_allocation---no imp!!!!!\n"); + struct amdgpu_device *adev = encoder->dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + u8 *sadb = NULL; + int sad_count; + u32 tmp; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb); + if (sad_count < 0) { + DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); + sad_count = 0; + } + + /* program the speaker allocation */ + tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + HDMI_CONNECTION, 0); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + DP_CONNECTION, 0); + + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + DP_CONNECTION, 1); + else + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + HDMI_CONNECTION, 1); + + if (sad_count) + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + SPEAKER_ALLOCATION, sadb[0]); + else + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + SPEAKER_ALLOCATION, 5); /* stereo */ + + WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); + + kfree(sadb); } static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { - DRM_INFO("xxxx: dce_v6_0_audio_write_sad_regs---no imp!!!!!\n"); + struct amdgpu_device *adev = encoder->dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + struct cea_sad *sads; + int i, sad_count; + + static const u16 eld_reg_to_type[][2] = { + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, + }; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); + if (sad_count <= 0) { + DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + return; + } + + for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { + u32 tmp = 0; + u8 stereo_freqs = 0; + int max_channels = -1; + int j; + + for (j = 0; j < sad_count; j++) { + struct cea_sad *sad = &sads[j]; + + if (sad->format == eld_reg_to_type[i][1]) { + if (sad->channels > max_channels) { + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + MAX_CHANNELS, sad->channels); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + DESCRIPTOR_BYTE_2, sad->byte2); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + SUPPORTED_FREQUENCIES, sad->freq); + max_channels = sad->channels; + } + + if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM) + stereo_freqs |= sad->freq; + else + break; + } + } + + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + SUPPORTED_FREQUENCIES_STEREO, stereo_freqs); + WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp); + } + + kfree(sads); } -*/ + static void dce_v6_0_audio_enable(struct amdgpu_device *adev, struct amdgpu_audio_pin *pin, bool enable) { - DRM_INFO("xxxx: dce_v6_0_audio_enable---no imp!!!!!\n"); + if (!pin) + return; + + WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0); } static const u32 pin_offsets[7] = @@ -1311,42 +1480,372 @@ static const u32 pin_offsets[7] = static int dce_v6_0_audio_init(struct amdgpu_device *adev) { + int i; + + if (!amdgpu_audio) + return 0; + + adev->mode_info.audio.enabled = true; + + switch (adev->asic_type) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + default: + adev->mode_info.audio.num_pins = 6; + break; + case CHIP_OLAND: + adev->mode_info.audio.num_pins = 2; + break; + } + + for (i = 0; i < adev->mode_info.audio.num_pins; i++) { + adev->mode_info.audio.pin[i].channels = -1; + adev->mode_info.audio.pin[i].rate = -1; + adev->mode_info.audio.pin[i].bits_per_sample = -1; + adev->mode_info.audio.pin[i].status_bits = 0; + adev->mode_info.audio.pin[i].category_code = 0; + adev->mode_info.audio.pin[i].connected = false; + adev->mode_info.audio.pin[i].offset = pin_offsets[i]; + adev->mode_info.audio.pin[i].id = i; + dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); + } + return 0; } static void dce_v6_0_audio_fini(struct amdgpu_device *adev) { + int i; + if (!amdgpu_audio) + return; + + if (!adev->mode_info.audio.enabled) + return; + + for (i = 0; i < adev->mode_info.audio.num_pins; i++) + dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); + + adev->mode_info.audio.enabled = false; } -/* -static void dce_v6_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock) +static void dce_v6_0_audio_set_vbi_packet(struct drm_encoder *encoder) { - DRM_INFO("xxxx: dce_v6_0_afmt_update_ACR---no imp!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); + WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); } -*/ -/* - * build a HDMI Video Info Frame - */ -/* -static void dce_v6_0_afmt_update_avi_infoframe(struct drm_encoder *encoder, - void *buffer, size_t size) + +static void dce_v6_0_audio_set_acr(struct drm_encoder *encoder, + uint32_t clock, int bpc) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock); + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, + bpc > 8 ? 0 : 1); + WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz); + WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp); + tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz); + WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz); + WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp); + tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz); + WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz); + WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp); + tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz); + WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp); +} + +static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder, + struct drm_display_mode *mode) { - DRM_INFO("xxxx: dce_v6_0_afmt_update_avi_infoframe---no imp!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct hdmi_avi_infoframe frame; + u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE]; + uint8_t *payload = buffer + 3; + uint8_t *header = buffer; + ssize_t err; + u32 tmp; + + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); + if (err < 0) { + DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); + return; + } + + err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer)); + if (err < 0) { + DRM_ERROR("failed to pack AVI infoframe: %zd\n", err); + return; + } + + WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset, + payload[0x0] | (payload[0x1] << 8) | (payload[0x2] << 16) | (payload[0x3] << 24)); + WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset, + payload[0x4] | (payload[0x5] << 8) | (payload[0x6] << 16) | (payload[0x7] << 24)); + WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset, + payload[0x8] | (payload[0x9] << 8) | (payload[0xA] << 16) | (payload[0xB] << 24)); + WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset, + payload[0xC] | (payload[0xD] << 8) | (header[1] << 24)); + + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); + /* anything other than 0 */ + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, + HDMI_AUDIO_INFO_LINE, 2); + WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); } static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock) { - DRM_INFO("xxxx: dce_v6_0_audio_set_dto---no imp!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); + u32 tmp; + + /* + * Two dtos: generally use dto0 for hdmi, dto1 for dp. + * Express [24MHz / target pixel clock] as an exact rational + * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE + * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator + */ + tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE); + tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, + DCCG_AUDIO_DTO0_SOURCE_SEL, amdgpu_crtc->crtc_id); + if (em == ATOM_ENCODER_MODE_HDMI) { + tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, + DCCG_AUDIO_DTO_SEL, 0); + } else if (ENCODER_MODE_IS_DP(em)) { + tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, + DCCG_AUDIO_DTO_SEL, 1); + } + WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp); + if (em == ATOM_ENCODER_MODE_HDMI) { + WREG32(mmDCCG_AUDIO_DTO0_PHASE, 24000); + WREG32(mmDCCG_AUDIO_DTO0_MODULE, clock); + } else if (ENCODER_MODE_IS_DP(em)) { + WREG32(mmDCCG_AUDIO_DTO1_PHASE, 24000); + WREG32(mmDCCG_AUDIO_DTO1_MODULE, clock); + } } -*/ -/* - * update the info frames with the data from the current display mode - */ + +static void dce_v6_0_audio_set_packet(struct drm_encoder *encoder) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1); + WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1); + WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2); + WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8); + WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, 0xff); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1); + tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3); + WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_RESET_FIFO_WHEN_AUDIO_DIS, 1); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); +} + +static void dce_v6_0_audio_set_mute(struct drm_encoder *encoder, bool mute) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmHDMI_GC + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_GC, HDMI_GC_AVMUTE, mute ? 1 : 0); + WREG32(mmHDMI_GC + dig->afmt->offset, tmp); +} + +static void dce_v6_0_audio_hdmi_enable(struct drm_encoder *encoder, bool enable) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + if (enable) { + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1); + WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2); + WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + } else { + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 0); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 0); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 0); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 0); + WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 0); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + } +} + +static void dce_v6_0_audio_dp_enable(struct drm_encoder *encoder, bool enable) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + if (enable) { + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + + tmp = RREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, 1); + WREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset, tmp); + + tmp = RREG32(mmDP_SEC_CNTL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ATP_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_AIP_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); + WREG32(mmDP_SEC_CNTL + dig->afmt->offset, tmp); + } else { + WREG32(mmDP_SEC_CNTL + dig->afmt->offset, 0); + } +} + static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode) { - DRM_INFO("xxxx: dce_v6_0_afmt_setmode ----no impl !!!!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); + int bpc = 8; + + if (!dig || !dig->afmt) + return; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + if (!dig->afmt->enabled) + return; + + dig->afmt->pin = dce_v6_0_audio_get_pin(adev); + if (!dig->afmt->pin) + return; + + if (encoder->crtc) { + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); + bpc = amdgpu_crtc->bpc; + } + + /* disable audio before setting up hw */ + dce_v6_0_audio_enable(adev, dig->afmt->pin, false); + + dce_v6_0_audio_set_mute(encoder, true); + dce_v6_0_audio_write_speaker_allocation(encoder); + dce_v6_0_audio_write_sad_regs(encoder); + dce_v6_0_audio_write_latency_fields(encoder, mode); + if (em == ATOM_ENCODER_MODE_HDMI) { + dce_v6_0_audio_set_dto(encoder, mode->clock); + dce_v6_0_audio_set_vbi_packet(encoder); + dce_v6_0_audio_set_acr(encoder, mode->clock, bpc); + } else if (ENCODER_MODE_IS_DP(em)) { + dce_v6_0_audio_set_dto(encoder, adev->clock.default_dispclk * 10); + } + dce_v6_0_audio_set_packet(encoder); + dce_v6_0_audio_select_pin(encoder); + dce_v6_0_audio_set_avi_infoframe(encoder, mode); + dce_v6_0_audio_set_mute(encoder, false); + if (em == ATOM_ENCODER_MODE_HDMI) { + dce_v6_0_audio_hdmi_enable(encoder, 1); + } else if (ENCODER_MODE_IS_DP(em)) { + dce_v6_0_audio_dp_enable(encoder, 1); + } + + /* enable audio after setting up hw */ + dce_v6_0_audio_enable(adev, dig->afmt->pin, true); } static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable) @@ -1362,6 +1861,7 @@ static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable) /* Silent, r600_hdmi_enable will raise WARN for us */ if (enable && dig->afmt->enabled) return; + if (!enable && !dig->afmt->enabled) return; @@ -2756,6 +3256,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); amdgpu_encoder->pixel_clock = adjusted_mode->clock; @@ -2765,7 +3266,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, /* set scaler clears this on some chips */ dce_v6_0_set_interleave(encoder->crtc, mode); - if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) { + if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) { dce_v6_0_afmt_enable(encoder, true); dce_v6_0_afmt_setmode(encoder, adjusted_mode); } @@ -2827,11 +3328,12 @@ static void dce_v6_0_encoder_disable(struct drm_encoder *encoder) struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig; + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF); if (amdgpu_atombios_encoder_is_digital(encoder)) { - if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) + if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) dce_v6_0_afmt_enable(encoder, false); dig = amdgpu_encoder->enc_priv; dig->dig_encoder = -1; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index db30c6ba563a..a9e869554627 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index f1b479b6ac98..90bb08309a53 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index a125f9d44577..5173ca1fd159 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -393,8 +393,11 @@ out: static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) { - const u32 num_tile_mode_states = 32; - u32 reg_offset, gb_tile_moden, split_equal_to_row_size; + const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); + u32 reg_offset, split_equal_to_row_size, *tilemode; + + memset(adev->gfx.config.tile_mode_array, 0, sizeof(adev->gfx.config.tile_mode_array)); + tilemode = adev->gfx.config.tile_mode_array; switch (adev->gfx.config.mem_row_size_in_kb) { case 1: @@ -410,887 +413,680 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) } if (adev->asic_type == CHIP_VERDE) { - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 1: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 2: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 3: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 4: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 5: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 6: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 7: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); - break; - case 9: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 10: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 12: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 14: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 15: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 16: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 17: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 18: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 19: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 20: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 21: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 22: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 23: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 24: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 25: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 26: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 27: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 28: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 29: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 30: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - default: - continue; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } - } else if (adev->asic_type == CHIP_OLAND || - adev->asic_type == CHIP_HAINAN) { - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 1: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 2: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 3: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 4: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 5: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 6: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 7: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); - break; - case 9: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 10: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 12: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 14: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 15: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 16: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 17: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 18: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 19: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 20: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 21: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 22: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 23: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 24: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 25: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 26: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 27: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 28: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 29: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 30: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - default: - continue; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } + tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); + tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); + } else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) { + tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); + tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); } else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) { - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 1: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 2: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 3: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 4: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 5: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 6: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 7: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); - break; - case 9: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 10: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 12: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 14: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 15: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 16: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 17: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 18: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 19: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 20: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 21: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 22: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 23: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 24: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 25: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 26: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 27: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 28: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 29: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 30: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - default: - continue; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } - } else{ - + tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); + tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); + } else { DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); } - } static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, @@ -1318,11 +1114,6 @@ static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, WREG32(mmGRBM_GFX_INDEX, data); } -static u32 gfx_v6_0_create_bitmask(u32 bit_width) -{ - return (u32)(((u64)1 << bit_width) - 1); -} - static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; @@ -1332,8 +1123,8 @@ static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev) data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); - mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_backends_per_se/ - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se/ + adev->gfx.config.max_sh_per_se); return ~data & mask; } @@ -1399,11 +1190,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { raster_config_se &= ~PA_SC_RASTER_CONFIG__SE_MAP_MASK; - if (!se_mask[idx]) { + if (!se_mask[idx]) raster_config_se |= RASTER_CONFIG_SE_MAP_3 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_SE_MAP_0 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; - } } pkr0_mask &= rb_mask; @@ -1411,11 +1201,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { raster_config_se &= ~PA_SC_RASTER_CONFIG__PKR_MAP_MASK; - if (!pkr0_mask) { + if (!pkr0_mask) raster_config_se |= RASTER_CONFIG_PKR_MAP_3 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_PKR_MAP_0 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; - } } if (rb_per_se >= 2) { @@ -1427,13 +1216,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if (!rb0_mask || !rb1_mask) { raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK; - if (!rb0_mask) { + if (!rb0_mask) raster_config_se |= RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; - } } if (rb_per_se > 2) { @@ -1444,13 +1232,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if (!rb0_mask || !rb1_mask) { raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK; - if (!rb0_mask) { + if (!rb0_mask) raster_config_se |= RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; - } } } } @@ -1479,8 +1266,9 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); data = gfx_v6_0_get_rb_active_bitmap(adev); - active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * - rb_bitmap_width_per_sh); + active_rbs |= data << + ((i * adev->gfx.config.max_sh_per_se + j) * + rb_bitmap_width_per_sh); } } gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ -1494,13 +1282,12 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) gfx_v6_0_raster_config(adev, &raster_config); if (!adev->gfx.config.backend_enable_mask || - adev->gfx.config.num_rbs >= num_rb_pipes) { + adev->gfx.config.num_rbs >= num_rb_pipes) WREG32(mmPA_SC_RASTER_CONFIG, raster_config); - } else { + else gfx_v6_0_write_harvested_raster_configs(adev, raster_config, adev->gfx.config.backend_enable_mask, num_rb_pipes); - } /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { @@ -1517,11 +1304,6 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); } -/* -static void gmc_v6_0_init_compute_vmid(struct amdgpu_device *adev) -{ -} -*/ static void gfx_v6_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, u32 bitmap) @@ -1544,7 +1326,7 @@ static u32 gfx_v6_0_get_cu_enabled(struct amdgpu_device *adev) data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); - mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; } @@ -1688,7 +1470,8 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); - mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); + adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); + mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; adev->gfx.config.mem_max_burst_length_bytes = 256; @@ -3719,6 +3502,12 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; unsigned disable_masks[4 * 2]; + u32 ao_cu_num; + + if (adev->flags & AMD_IS_APU) + ao_cu_num = 2; + else + ao_cu_num = adev->gfx.config.max_cu_per_sh; memset(cu_info, 0, sizeof(*cu_info)); @@ -3737,16 +3526,18 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) bitmap = gfx_v6_0_get_cu_enabled(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { - if (counter < 2) + if (counter < ao_cu_num) ao_bitmap |= mask; counter ++; } mask <<= 1; } active_cu_number += counter; - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index ee2f2139e2eb..37b45e4403d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -21,12 +21,13 @@ * */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "amdgpu_gfx.h" #include "cikd.h" #include "cik.h" +#include "cik_structs.h" #include "atom.h" #include "amdgpu_ucode.h" #include "clearstate_ci.h" @@ -48,7 +49,7 @@ #include "oss/oss_2_0_sh_mask.h" #define GFX7_NUM_GFX_RINGS 1 -#define GFX7_NUM_COMPUTE_RINGS 8 +#define GFX7_MEC_HPD_SIZE 2048 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); @@ -1607,19 +1608,6 @@ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, } /** - * gfx_v7_0_create_bitmask - create a bitmask - * - * @bit_width: length of the mask - * - * create a variable length bit mask (CIK). - * Returns the bitmask. - */ -static u32 gfx_v7_0_create_bitmask(u32 bit_width) -{ - return (u32)((1ULL << bit_width) - 1); -} - -/** * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs * * @adev: amdgpu_device pointer @@ -1637,8 +1625,8 @@ static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev) data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; - mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); return (~data) & mask; } @@ -1837,7 +1825,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) /** * gmc_v7_0_init_compute_vmid - gart enable * - * @rdev: amdgpu_device pointer + * @adev: amdgpu_device pointer * * Initialize compute vmid sh_mem registers * @@ -2821,26 +2809,23 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) } } -#define MEC_HPD_SIZE 2048 - static int gfx_v7_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; + size_t mec_hpd_size; - /* - * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total - * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total - * Nonetheless, we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + /* allocate space for ALL pipes (even the ones we don't own) */ + mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec + * GFX7_MEC_HPD_SIZE * 2; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, + mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); @@ -2870,7 +2855,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) } /* clear memory. Not sure if this is required or not */ - memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); + memset(hpd, 0, mec_hpd_size); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -2917,275 +2902,296 @@ struct hqd_registers u32 cp_mqd_control; }; -struct bonaire_mqd +static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, + int mec, int pipe) { - u32 header; - u32 dispatch_initiator; - u32 dimensions[3]; - u32 start_idx[3]; - u32 num_threads[3]; - u32 pipeline_stat_enable; - u32 perf_counter_enable; - u32 pgm[2]; - u32 tba[2]; - u32 tma[2]; - u32 pgm_rsrc[2]; - u32 vmid; - u32 resource_limits; - u32 static_thread_mgmt01[2]; - u32 tmp_ring_size; - u32 static_thread_mgmt23[2]; - u32 restart[3]; - u32 thread_trace_enable; - u32 reserved1; - u32 user_data[16]; - u32 vgtcs_invoke_count[2]; - struct hqd_registers queue_state; - u32 dequeue_cntr; - u32 interrupt_queue[64]; -}; - -/** - * gfx_v7_0_cp_compute_resume - setup the compute queue registers - * - * @adev: amdgpu_device pointer - * - * Program the compute queues and test them to make sure they - * are working. - * Returns 0 for success, error for failure. - */ -static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) -{ - int r, i, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; - struct bonaire_mqd *mqd; - struct amdgpu_ring *ring; - - /* fix up chicken bits */ - tmp = RREG32(mmCP_CPF_DEBUG); - tmp |= (1 << 23); - WREG32(mmCP_CPF_DEBUG, tmp); + u32 tmp; + size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe) + * GFX7_MEC_HPD_SIZE * 2; - /* init the pipes */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { - int me = (i < 4) ? 1 : 2; - int pipe = (i < 4) ? i : (i - 4); + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset; - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); + cik_srbm_select(adev, mec + 1, pipe, 0, 0); - cik_srbm_select(adev, me, pipe, 0, 0); + /* write the EOP addr */ + WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); + WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); - /* write the EOP addr */ - WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); + /* set the VMID assigned */ + WREG32(mmCP_HPD_EOP_VMID, 0); - /* set the VMID assigned */ - WREG32(mmCP_HPD_EOP_VMID, 0); + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32(mmCP_HPD_EOP_CONTROL); + tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; + tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8); + WREG32(mmCP_HPD_EOP_CONTROL, tmp); - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(mmCP_HPD_EOP_CONTROL); - tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; - tmp |= order_base_2(MEC_HPD_SIZE / 8); - WREG32(mmCP_HPD_EOP_CONTROL, tmp); - } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); +} - /* init the queues. Just two for now. */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; +static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev) +{ + int i; - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct bonaire_mqd), - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } + /* disable the queue if it's active */ + if (RREG32(mmCP_HQD_ACTIVE) & 1) { + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v7_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v7_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); - if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v7_0_cp_compute_fini(adev); - return r; - } + if (i == adev->usec_timeout) + return -ETIMEDOUT; - /* init the mqd struct */ - memset(buf, 0, sizeof(struct bonaire_mqd)); + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); + WREG32(mmCP_HQD_PQ_RPTR, 0); + WREG32(mmCP_HQD_PQ_WPTR, 0); + } - mqd = (struct bonaire_mqd *)buf; - mqd->header = 0xC0310800; - mqd->static_thread_mgmt01[0] = 0xffffffff; - mqd->static_thread_mgmt01[1] = 0xffffffff; - mqd->static_thread_mgmt23[0] = 0xffffffff; - mqd->static_thread_mgmt23[1] = 0xffffffff; + return 0; +} - mutex_lock(&adev->srbm_mutex); - cik_srbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); +static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, + struct cik_mqd *mqd, + uint64_t mqd_gpu_addr, + struct amdgpu_ring *ring) +{ + u64 hqd_gpu_addr; + u64 wb_gpu_addr; - /* disable wptr polling */ - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); - tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK; - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + /* init the mqd struct */ + memset(mqd, 0, sizeof(struct cik_mqd)); - /* enable doorbell? */ - mqd->queue_state.cp_hqd_pq_doorbell_control = - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) - mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - else - mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->queue_state.cp_hqd_pq_doorbell_control); - - /* disable the queue if it's active */ - mqd->queue_state.cp_hqd_dequeue_request = 0; - mqd->queue_state.cp_hqd_pq_rptr = 0; - mqd->queue_state.cp_hqd_pq_wptr= 0; - if (RREG32(mmCP_HQD_ACTIVE) & 1) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - } + mqd->header = 0xC0310800; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - /* set the pointer to the MQD */ - mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); - /* set MQD vmid to 0 */ - mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); - mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; - WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; - mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); - mqd->queue_state.cp_hqd_pq_control &= - ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | - CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); - - mqd->queue_state.cp_hqd_pq_control |= - order_base_2(ring->ring_size / 8); - mqd->queue_state.cp_hqd_pq_control |= - (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); + /* enable doorbell? */ + mqd->cp_hqd_pq_doorbell_control = + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + if (ring->use_doorbell) + mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + else + mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); + + /* set MQD vmid to 0 */ + mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL); + mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); + mqd->cp_hqd_pq_control &= + ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | + CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); + + mqd->cp_hqd_pq_control |= + order_base_2(ring->ring_size / 8); + mqd->cp_hqd_pq_control |= + (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); #ifdef __BIG_ENDIAN - mqd->queue_state.cp_hqd_pq_control |= - 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; + mqd->cp_hqd_pq_control |= + 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; #endif - mqd->queue_state.cp_hqd_pq_control &= - ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | + mqd->cp_hqd_pq_control &= + ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK | CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK); - mqd->queue_state.cp_hqd_pq_control |= - CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | - CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ - WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->queue_state.cp_hqd_pq_rptr_report_addr); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); - - /* enable the doorbell if requested */ - if (use_doorbell) { - mqd->queue_state.cp_hqd_pq_doorbell_control = - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - mqd->queue_state.cp_hqd_pq_doorbell_control &= - ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; - mqd->queue_state.cp_hqd_pq_doorbell_control |= - (ring->doorbell_index << - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); - mqd->queue_state.cp_hqd_pq_doorbell_control |= - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - mqd->queue_state.cp_hqd_pq_doorbell_control &= - ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); + mqd->cp_hqd_pq_control |= + CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | + CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ - } else { - mqd->queue_state.cp_hqd_pq_doorbell_control = 0; + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set the wb address wether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + mqd->cp_hqd_pq_doorbell_control = + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + mqd->cp_hqd_pq_doorbell_control &= + ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; + mqd->cp_hqd_pq_doorbell_control |= + (ring->doorbell_index << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); + mqd->cp_hqd_pq_doorbell_control |= + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + mqd->cp_hqd_pq_doorbell_control &= + ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); + + } else { + mqd->cp_hqd_pq_doorbell_control = 0; + } + + /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + /* defaults */ + mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL); + mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR); + mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI); + mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR); + mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE); + mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD); + mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE); + mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO); + mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI); + mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO); + mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI); + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); + mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); + mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); + mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR); + + /* activate the queue */ + mqd->cp_hqd_active = 1; +} + +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) +{ + uint32_t tmp; + uint32_t mqd_reg; + uint32_t *mqd_data; + + /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */ + mqd_data = &mqd->cp_mqd_base_addr_lo; + + /* disable wptr polling */ + tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); + WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + + /* program all HQD registers */ + for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); + + /* activate the HQD */ + for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); + + return 0; +} + +static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id) +{ + int r; + u64 mqd_gpu_addr; + struct cik_mqd *mqd; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + if (ring->mqd_obj == NULL) { + r = amdgpu_bo_create(adev, + sizeof(struct cik_mqd), + PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, + &ring->mqd_obj); + if (r) { + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); + return r; } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->queue_state.cp_hqd_pq_doorbell_control); + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto out; - /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - ring->wptr = 0; - mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, + &mqd_gpu_addr); + if (r) { + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); + goto out_unreserve; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); + if (r) { + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); + goto out_unreserve; + } - /* set the vmid for the queue */ - mqd->queue_state.cp_hqd_vmid = 0; - WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); + mutex_lock(&adev->srbm_mutex); + cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - /* activate the queue */ - mqd->queue_state.cp_hqd_active = 1; - WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); + gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring); + gfx_v7_0_mqd_deactivate(adev); + gfx_v7_0_mqd_commit(adev, mqd); - cik_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + cik_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); + amdgpu_bo_kunmap(ring->mqd_obj); +out_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +out: + return 0; +} - ring->ready = true; +/** + * gfx_v7_0_cp_compute_resume - setup the compute queue registers + * + * @adev: amdgpu_device pointer + * + * Program the compute queues and test them to make sure they + * are working. + * Returns 0 for success, error for failure. + */ +static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) +{ + int r, i, j; + u32 tmp; + struct amdgpu_ring *ring; + + /* fix up chicken bits */ + tmp = RREG32(mmCP_CPF_DEBUG); + tmp |= (1 << 23); + WREG32(mmCP_CPF_DEBUG, tmp); + + /* init all pipes (even the ones we don't own) */ + for (i = 0; i < adev->gfx.mec.num_mec; i++) + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) + gfx_v7_0_compute_pipe_init(adev, i, j); + + /* init the queues */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + r = gfx_v7_0_compute_queue_init(adev, i); + if (r) { + gfx_v7_0_cp_compute_fini(adev); + return r; + } } gfx_v7_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - + ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; @@ -3797,6 +3803,9 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) gfx_v7_0_update_rlc(adev, tmp); data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (orig != data) + WREG32(mmRLC_CGCG_CGLS_CTRL, data); + } else { gfx_v7_0_enable_gui_idle_interrupt(adev, false); @@ -3806,11 +3815,11 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) RREG32(mmCB_CGTT_SCLK_CTRL); data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); - } - - if (orig != data) - WREG32(mmRLC_CGCG_CGLS_CTRL, data); + if (orig != data) + WREG32(mmRLC_CGCG_CGLS_CTRL, data); + gfx_v7_0_enable_gui_idle_interrupt(adev, true); + } } static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) @@ -4089,7 +4098,7 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev) data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; - mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return (~data) & mask; } @@ -4470,7 +4479,7 @@ static int gfx_v7_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; adev->gfx.funcs = &gfx_v7_0_gfx_funcs; adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; gfx_v7_0_set_ring_funcs(adev); @@ -4662,11 +4671,57 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config = gb_addr_config; } +static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + + return 0; +} + static int gfx_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, r; + int i, j, k, r, ring_id; + + switch (adev->asic_type) { + case CHIP_KAVERI: + adev->gfx.mec.num_mec = 2; + break; + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + default: + adev->gfx.mec.num_mec = 1; + break; + } + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; /* EOP Event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); @@ -4716,29 +4771,23 @@ static int gfx_v7_0_sw_init(void *handle) return r; } - /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - unsigned irq_type; - - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); - break; + /* set up the compute queues - allocate horizontally across pipes */ + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v7_0_compute_ring_init(adev, + ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } } - ring = &adev->gfx.compute_ring[i]; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; - /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); - if (r) - return r; } /* reserve GDS, GWS and OA resource for gfx */ @@ -4969,8 +5018,8 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, u32 mec_int_cntl, mec_int_cntl_reg; /* - * amdgpu controls only pipe 0 of MEC1. That's why this function only - * handles the setting of interrupts for this specific pipe. All other + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other * pipes' interrupts are set by amdkfd. */ @@ -4979,6 +5028,15 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, case 0: mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; break; + case 1: + mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; + break; + case 2: + mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; + break; + case 3: + mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; + break; default: DRM_DEBUG("invalid pipe %d\n", pipe); return; @@ -5336,6 +5394,12 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; unsigned disable_masks[4 * 2]; + u32 ao_cu_num; + + if (adev->flags & AMD_IS_APU) + ao_cu_num = 2; + else + ao_cu_num = adev->gfx.config.max_cu_per_sh; memset(cu_info, 0, sizeof(*cu_info)); @@ -5354,16 +5418,18 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) bitmap = gfx_v7_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k ++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { - if (counter < 2) + if (counter < ao_cu_num) ao_bitmap |= mask; counter ++; } mask <<= 1; } active_cu_number += counter; - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h index 2f5164cc0e53..6fb9c1524691 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h @@ -29,4 +29,9 @@ extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block; +struct amdgpu_device; +struct cik_mqd; + +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 758d636a6f52..aa5a50f5eac8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -21,7 +21,7 @@ * */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_gfx.h" #include "vi.h" @@ -40,7 +40,6 @@ #include "bif/bif_5_0_d.h" #include "bif/bif_5_0_sh_mask.h" - #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" #include "gca/gfx_8_0_sh_mask.h" @@ -52,7 +51,7 @@ #include "smu/smu_7_1_3_d.h" #define GFX8_NUM_GFX_RINGS 1 -#define GFX8_NUM_COMPUTE_RINGS 8 +#define GFX8_MEC_HPD_SIZE 2048 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 @@ -657,10 +656,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); -static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); -static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); -static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev); -static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev); +static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); +static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -859,7 +856,8 @@ err1: } -static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { +static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) +{ release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; release_firmware(adev->gfx.me_fw); @@ -941,12 +939,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - /* chain ib ucode isn't formal released, just disable it by far - * TODO: when ucod ready we should use ucode version to judge if - * chain-ib support or not. - */ - adev->virt.chained_ib_support = false; - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); @@ -960,6 +952,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + /* + * Support for MCBP/Virtualization in combination with chained IBs is + * formal released on feature version #46 + */ + if (adev->gfx.ce_feature_version >= 46 && + adev->gfx.pfp_feature_version >= 46) { + adev->virt.chained_ib_support = true; + DRM_INFO("Chained IB support enabled!\n"); + } else + adev->virt.chained_ib_support = false; + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) @@ -1373,64 +1376,22 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) } } -static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - int r = 0; - - r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); - if (r) - return r; - - ring->adev = NULL; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_KIQ; - if (adev->gfx.mec2_fw) { - ring->me = 2; - ring->pipe = 0; - } else { - ring->me = 1; - ring->pipe = 1; - } - - ring->queue = 0; - ring->eop_gpu_addr = kiq->eop_gpu_addr; - sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_init(adev, ring, 1024, - irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); - if (r) - dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); - - return r; -} -static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); - amdgpu_ring_fini(ring); -} - -#define MEC_HPD_SIZE 2048 - static int gfx_v8_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; + size_t mec_hpd_size; - /* - * we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + + mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_queue * MEC_HPD_SIZE, + mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); @@ -1459,7 +1420,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) return r; } - memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); + memset(hpd, 0, mec_hpd_size); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -1467,38 +1428,6 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) return 0; } -static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); -} - -static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) -{ - int r; - u32 *hpd; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, - &kiq->eop_gpu_addr, (void **)&hpd); - if (r) { - dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); - return r; - } - - memset(hpd, 0, MEC_HPD_SIZE); - - r = amdgpu_bo_reserve(kiq->eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); - amdgpu_bo_kunmap(kiq->eop_obj); - amdgpu_bo_unreserve(kiq->eop_obj); - - return 0; -} - static const u32 vgpr_init_compute_shader[] = { 0x7e000209, 0x7e020208, @@ -1907,46 +1836,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_tile_pipes = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 2; - - switch (adev->pdev->revision) { - case 0xc4: - case 0x84: - case 0xc8: - case 0xcc: - case 0xe1: - case 0xe3: - /* B10 */ - adev->gfx.config.max_cu_per_sh = 8; - break; - case 0xc5: - case 0x81: - case 0x85: - case 0xc9: - case 0xcd: - case 0xe2: - case 0xe4: - /* B8 */ - adev->gfx.config.max_cu_per_sh = 6; - break; - case 0xc6: - case 0xca: - case 0xce: - case 0x88: - case 0xe6: - /* B6 */ - adev->gfx.config.max_cu_per_sh = 6; - break; - case 0xc7: - case 0x87: - case 0xcb: - case 0xe5: - case 0x89: - default: - /* B4 */ - adev->gfx.config.max_cu_per_sh = 4; - break; - } - + adev->gfx.config.max_cu_per_sh = 8; adev->gfx.config.max_texture_channel_caches = 2; adev->gfx.config.max_gprs = 256; adev->gfx.config.max_gs_threads = 32; @@ -1963,35 +1853,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_tile_pipes = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 1; - - switch (adev->pdev->revision) { - case 0x80: - case 0x81: - case 0xc0: - case 0xc1: - case 0xc2: - case 0xc4: - case 0xc8: - case 0xc9: - case 0xd6: - case 0xda: - case 0xe9: - case 0xea: - adev->gfx.config.max_cu_per_sh = 3; - break; - case 0x83: - case 0xd0: - case 0xd1: - case 0xd2: - case 0xd4: - case 0xdb: - case 0xe1: - case 0xe2: - default: - adev->gfx.config.max_cu_per_sh = 2; - break; - } - + adev->gfx.config.max_cu_per_sh = 3; adev->gfx.config.max_texture_channel_caches = 2; adev->gfx.config.max_gprs = 256; adev->gfx.config.max_gs_threads = 16; @@ -2083,13 +1945,67 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) return 0; } +static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id * GFX8_MEC_HPD_SIZE); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + + return 0; +} + static int gfx_v8_0_sw_init(void *handle) { - int i, r; + int i, j, k, r, ring_id; struct amdgpu_ring *ring; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (adev->asic_type) { + case CHIP_FIJI: + case CHIP_TONGA: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_POLARIS10: + case CHIP_CARRIZO: + adev->gfx.mec.num_mec = 2; + break; + case CHIP_TOPAZ: + case CHIP_STONEY: + default: + adev->gfx.mec.num_mec = 1; + break; + } + + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + /* KIQ event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); if (r) @@ -2151,49 +2067,41 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - unsigned irq_type; - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); - break; + /* set up the compute queues - allocate horizontally across pipes */ + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v8_0_compute_ring_init(adev, + ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } } - ring = &adev->gfx.compute_ring[i]; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; - /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, - irq_type); - if (r) - return r; } - if (amdgpu_sriov_vf(adev)) { - r = gfx_v8_0_kiq_init(adev); - if (r) { - DRM_ERROR("Failed to init KIQ BOs!\n"); - return r; - } + r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } - kiq = &adev->gfx.kiq; - r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); - if (r) - return r; + kiq = &adev->gfx.kiq; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + if (r) + return r; - /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = gfx_v8_0_compute_mqd_sw_init(adev); - if (r) - return r; - } + /* create MQD for all compute queues as well as KIQ for SRIOV case */ + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); + if (r) + return r; /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, @@ -2237,11 +2145,9 @@ static int gfx_v8_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - if (amdgpu_sriov_vf(adev)) { - gfx_v8_0_compute_mqd_sw_fini(adev); - gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); - gfx_v8_0_kiq_fini(adev); - } + amdgpu_gfx_compute_mqd_sw_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_kiq_fini(adev); gfx_v8_0_mec_fini(adev); gfx_v8_0_rlc_fini(adev); @@ -3594,11 +3500,6 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, WREG32(mmGRBM_GFX_INDEX, data); } -static u32 gfx_v8_0_create_bitmask(u32 bit_width) -{ - return (u32)((1ULL << bit_width) - 1); -} - static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; @@ -3608,8 +3509,8 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); - mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); return (~data) & mask; } @@ -3823,7 +3724,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) /** * gfx_v8_0_init_compute_vmid - gart enable * - * @rdev: amdgpu_device pointer + * @adev: amdgpu_device pointer * * Initialize compute vmid sh_mem registers * @@ -4481,6 +4382,39 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) return 0; } +static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + u32 tmp; + /* no gfx doorbells on iceland */ + if (adev->asic_type == CHIP_TOPAZ) + return; + + tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); + + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); + } + + WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); + + if (adev->flags & AMD_IS_APU) + return; + + tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, + AMDGPU_DOORBELL_GFX_RING0); + WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); + + WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); +} static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) { @@ -4528,34 +4462,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32(mmCP_RB0_BASE, rb_addr); WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); - /* no gfx doorbells on iceland */ - if (adev->asic_type != CHIP_TOPAZ) { - tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); - if (ring->use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_HIT, 0); - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 0); - } - WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); - - if (adev->asic_type == CHIP_TONGA) { - tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, - DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_GFX_RING0); - WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); - - WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, - CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); - } - - } - + gfx_v8_0_set_cpg_door_bell(adev, ring); /* start the ring */ amdgpu_ring_clear_ring(ring); gfx_v8_0_cp_gfx_start(adev); @@ -4628,29 +4535,6 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) return 0; } -static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) -{ - int i, r; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - if (ring->mqd_obj) { - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); - - amdgpu_bo_unpin(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - amdgpu_bo_unref(&ring->mqd_obj); - ring->mqd_obj = NULL; - ring->mqd_ptr = NULL; - ring->mqd_gpu_addr = 0; - } - } -} - /* KIQ functions */ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) { @@ -4666,45 +4550,111 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) WREG32(mmRLC_CP_SCHEDULERS, tmp); } -static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring) +static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) { - amdgpu_ring_alloc(ring, 8); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + uint32_t scratch, tmp = 0; + uint64_t queue_mask = 0; + int r, i; + + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of queue_mask needs updating */ + if (WARN_ON(i > (sizeof(queue_mask)*8))) { + DRM_ERROR("Invalid KCQ enabled: %d\n", i); + break; + } + + queue_mask |= (1ull << i); + } + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("Failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + + r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } /* set resources */ - amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ - amdgpu_ring_write(ring, 0); /* queue mask hi */ - amdgpu_ring_write(ring, 0); /* gws mask lo */ - amdgpu_ring_write(ring, 0); /* gws mask hi */ - amdgpu_ring_write(ring, 0); /* oac mask */ - amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ - amdgpu_ring_commit(ring); - udelay(50); + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + + /* map queues */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); + } + /* write to scratch for completion */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(kiq_ring, 0xDEADBEEF); + amdgpu_ring_commit(kiq_ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i >= adev->usec_timeout) { + DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + + return r; } -static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, - struct amdgpu_ring *ring) +static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) { - struct amdgpu_device *adev = kiq_ring->adev; - uint64_t mqd_addr, wptr_addr; + int i, r = 0; - mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - amdgpu_ring_alloc(kiq_ring, 8); + if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { + WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + udelay(1); + } + if (i == adev->usec_timeout) + r = -ETIMEDOUT; + } + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); + WREG32(mmCP_HQD_PQ_RPTR, 0); + WREG32(mmCP_HQD_PQ_WPTR, 0); - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, 0x21010000); - amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) | - (ring->queue << 26) | - (ring->pipe << 29) | - ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */ - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); - amdgpu_ring_commit(kiq_ring); - udelay(50); + return r; } static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) @@ -4721,7 +4671,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->compute_static_thread_mgmt_se2 = 0xffffffff; mqd->compute_static_thread_mgmt_se3 = 0xffffffff; mqd->compute_misc_reserved = 0x00000003; - + if (!(adev->flags & AMD_IS_APU)) { + mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr + + offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); + mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr + + offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); + } eop_base_addr = ring->eop_gpu_addr >> 8; mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); @@ -4729,7 +4684,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ tmp = RREG32(mmCP_HQD_EOP_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); + (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; @@ -4741,11 +4696,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_pq_doorbell_control = tmp; - /* disable the queue if it's active */ - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr = 0; - /* set the pointer to the MQD */ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); @@ -4815,149 +4765,160 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); mqd->cp_hqd_persistent_state = tmp; + /* set MTYPE */ + tmp = RREG32(mmCP_HQD_IB_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); + mqd->cp_hqd_ib_control = tmp; + + tmp = RREG32(mmCP_HQD_IQ_TIMER); + tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); + mqd->cp_hqd_iq_timer = tmp; + + tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); + mqd->cp_hqd_ctx_save_control = tmp; + + /* defaults */ + mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); + mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); + mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); + mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); + mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); + mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); + mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); + mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); + mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); + mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); + mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); + mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); + mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); + mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); + /* activate the queue */ mqd->cp_hqd_active = 1; return 0; } -static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, + struct vi_mqd *mqd) { - struct amdgpu_device *adev = ring->adev; - struct vi_mqd *mqd = ring->mqd_ptr; - int j; + uint32_t mqd_reg; + uint32_t *mqd_data; + + /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ + mqd_data = &mqd->cp_mqd_base_addr_lo; /* disable wptr polling */ WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); + /* program all HQD registers */ + for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); - - /* enable doorbell? */ - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); - - /* disable the queue if it's active */ - if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); + /* Tonga errata: EOP RPTR/WPTR should be left unmodified. + * This is safe since EOP RPTR==WPTR for any inactive HQD + * on ASICs that do not support context-save. + * EOP writes/reads can start anywhere in the ring. + */ + if (adev->asic_type != CHIP_TONGA) { + WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); + WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); + WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); } - /* set the pointer to the MQD */ - WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set MQD vmid to 0 */ - WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); + for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); + /* activate the HQD */ + for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); - - /* set the wb address whether it's enabled or not */ - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); - - /* enable the doorbell if requested */ - if (ring->use_doorbell) { - if ((adev->asic_type == CHIP_CARRIZO) || - (adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY)) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_KIQ << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); - } - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); + return 0; +} - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); +static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct vi_mqd *mqd = ring->mqd_ptr; + int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; - /* set the vmid for the queue */ - WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); + gfx_v8_0_kiq_setting(ring); - WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); + if (adev->gfx.in_reset) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); - /* activate the queue */ - WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_mqd_commit(adev, mqd); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else { + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_mqd_init(ring); + gfx_v8_0_mqd_commit(adev, mqd); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); - if (ring->use_doorbell) - WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); + } return 0; } -static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) +static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct vi_mqd *mqd = ring->mqd_ptr; - bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); - int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; + int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (is_kiq) { - gfx_v8_0_kiq_setting(&kiq->ring); - } else { - mqd_idx = ring - &adev->gfx.compute_ring[0]; - } - - if (!adev->gfx.in_reset) { - memset((void *)mqd, 0, sizeof(*mqd)); + if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v8_0_mqd_init(ring); - if (is_kiq) - gfx_v8_0_kiq_init_register(ring); vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else { /* for GPU_RESET case */ + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); + } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); - + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; amdgpu_ring_clear_ring(ring); - - if (is_kiq) { - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - gfx_v8_0_kiq_init_register(ring); - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } + } else { + amdgpu_ring_clear_ring(ring); } - - if (is_kiq) - gfx_v8_0_kiq_enable(ring); - else - gfx_v8_0_map_queue_enable(&kiq->ring, ring); - return 0; } +static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) +{ + if (adev->asic_type > CHIP_TONGA) { + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); + } + /* enable doorbells */ + WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); +} + static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) { struct amdgpu_ring *ring = NULL; @@ -4981,13 +4942,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) if (r) goto done; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; - } - for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -4996,272 +4950,41 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); if (!r) { - r = gfx_v8_0_kiq_init_queue(ring); + r = gfx_v8_0_kcq_init_queue(ring); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } amdgpu_bo_unreserve(ring->mqd_obj); if (r) goto done; - - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; } -done: - return r; -} - -static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) -{ - int r, i, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; - u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; - struct vi_mqd *mqd; - - /* init the queues. */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct vi_mqd), - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v8_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v8_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); - if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v8_0_cp_compute_fini(adev); - return r; - } - - /* init the mqd struct */ - memset(buf, 0, sizeof(struct vi_mqd)); - - mqd = (struct vi_mqd *)buf; - mqd->header = 0xC0310800; - mqd->compute_pipelinestat_enable = 0x00000001; - mqd->compute_static_thread_mgmt_se0 = 0xffffffff; - mqd->compute_static_thread_mgmt_se1 = 0xffffffff; - mqd->compute_static_thread_mgmt_se2 = 0xffffffff; - mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - mqd->compute_misc_reserved = 0x00000003; - - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); - - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - eop_gpu_addr >>= 8; - - /* write the EOP addr */ - WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); - - /* set the VMID assigned */ - WREG32(mmCP_HQD_VMID, 0); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(mmCP_HQD_EOP_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); - WREG32(mmCP_HQD_EOP_CONTROL, tmp); - - /* disable wptr polling */ - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); - tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); - - mqd->cp_hqd_eop_base_addr_lo = - RREG32(mmCP_HQD_EOP_BASE_ADDR); - mqd->cp_hqd_eop_base_addr_hi = - RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); - - /* enable doorbell? */ - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); - mqd->cp_hqd_pq_doorbell_control = tmp; - - /* disable the queue if it's active */ - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr= 0; - if (RREG32(mmCP_HQD_ACTIVE) & 1) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); - } - - /* set the pointer to the MQD */ - mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; - mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set MQD vmid to 0 */ - tmp = RREG32(mmCP_MQD_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); - WREG32(mmCP_MQD_CONTROL, tmp); - mqd->cp_mqd_control = tmp; - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; - mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32(mmCP_HQD_PQ_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, - (order_base_2(ring->ring_size / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); -#ifdef __BIG_ENDIAN - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); -#endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); - WREG32(mmCP_HQD_PQ_CONTROL, tmp); - mqd->cp_hqd_pq_control = tmp; - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo |