Merge branches 'x86-alternatives-for-linus', 'x86-fpu-for-linus', 'x86-hwmon-for...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 6 Jan 2011 19:11:50 +0000 (11:11 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 6 Jan 2011 19:11:50 +0000 (11:11 -0800)
* 'x86-alternatives-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, suspend: Avoid unnecessary smp alternatives switch during suspend/resume

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86-64, asm: Use fxsaveq/fxrestorq in more places

* 'x86-hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, hwmon: Add core threshold notification to therm_throt.c

* 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, paravirt: Use native_halt on a halt, not native_safe_halt

* 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  locking, lockdep: Convert sprintf_symbol to %pS

* 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  irq: Better struct irqaction layout

590 files changed:
CREDITS
Documentation/RCU/trace.txt
Documentation/accounting/getdelays.c
Documentation/dontdiff
Documentation/filesystems/Locking
Documentation/kernel-docs.txt
Documentation/kernel-parameters.txt
Documentation/scsi/scsi_mid_low_api.txt
Documentation/trace/events-power.txt [new file with mode: 0644]
Documentation/trace/postprocess/trace-vmscan-postprocess.pl
Documentation/x86/boot.txt
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/include/asm/perf_event.h
arch/alpha/kernel/irq_alpha.c
arch/alpha/kernel/perf_event.c
arch/arm/common/it8152.c
arch/arm/include/asm/hardware/it8152.h
arch/arm/include/asm/highmem.h
arch/arm/include/asm/sizes.h
arch/arm/include/asm/system.h
arch/arm/kernel/entry-common.S
arch/arm/kernel/perf_event.c
arch/arm/kernel/smp.c
arch/arm/mach-at91/include/mach/at91_mci.h
arch/arm/mach-ixp4xx/common-pci.c
arch/arm/mach-pxa/Kconfig
arch/arm/mach-pxa/sleep.S
arch/arm/mm/cache-feroceon-l2.c
arch/arm/mm/cache-xsc3l2.c
arch/arm/mm/dma-mapping.c
arch/arm/mm/flush.c
arch/arm/mm/highmem.c
arch/mips/kernel/perf_event_mipsxx.c
arch/mn10300/kernel/irq.c
arch/powerpc/kernel/e500-pmu.c
arch/powerpc/kernel/mpc7450-pmu.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/perf_event_fsl_emb.c
arch/powerpc/kernel/power4-pmu.c
arch/powerpc/kernel/power5+-pmu.c
arch/powerpc/kernel/power5-pmu.c
arch/powerpc/kernel/power6-pmu.c
arch/powerpc/kernel/power7-pmu.c
arch/powerpc/kernel/ppc970-pmu.c
arch/powerpc/platforms/52xx/mpc52xx_gpt.c
arch/s390/Kconfig
arch/s390/include/asm/mutex.h
arch/sh/boards/mach-se/7206/irq.c
arch/sh/kernel/cpu/sh2a/clock-sh7201.c
arch/sh/kernel/cpu/sh4/clock-sh4-202.c
arch/sh/kernel/cpu/sh4/perf_event.c
arch/sh/kernel/cpu/sh4a/perf_event.c
arch/sh/kernel/perf_event.c
arch/sparc/include/asm/perf_event.h
arch/sparc/kernel/nmi.c
arch/sparc/kernel/perf_event.c
arch/x86/Kconfig
arch/x86/Kconfig.debug
arch/x86/boot/compressed/head_64.S
arch/x86/include/asm/alternative.h
arch/x86/include/asm/amd_nb.h
arch/x86/include/asm/apic.h
arch/x86/include/asm/apicdef.h
arch/x86/include/asm/bootparam.h
arch/x86/include/asm/fixmap.h
arch/x86/include/asm/i387.h
arch/x86/include/asm/io_apic.h
arch/x86/include/asm/irq.h
arch/x86/include/asm/kdebug.h
arch/x86/include/asm/mce.h
arch/x86/include/asm/microcode.h
arch/x86/include/asm/mpspec.h
arch/x86/include/asm/mpspec_def.h
arch/x86/include/asm/mrst-vrtc.h [new file with mode: 0644]
arch/x86/include/asm/mrst.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nmi.h
arch/x86/include/asm/pci.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/perf_event_p4.h
arch/x86/include/asm/setup.h
arch/x86/include/asm/smpboot_hooks.h
arch/x86/include/asm/stacktrace.h
arch/x86/include/asm/timer.h
arch/x86/include/asm/uv/uv_bau.h
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/alternative.c
arch/x86/kernel/amd_nb.c
arch/x86/kernel/apb_timer.c
arch/x86/kernel/aperture_64.c
arch/x86/kernel/apic/Makefile
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/hw_nmi.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/nmi.c [deleted file]
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/intel_cacheinfo.c
arch/x86/kernel/cpu/mcheck/mce_amd.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_amd.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perfctr-watchdog.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/early_printk.c
arch/x86/kernel/early_printk_mrst.c [deleted file]
arch/x86/kernel/ftrace.c
arch/x86/kernel/head32.c
arch/x86/kernel/head_32.S
arch/x86/kernel/kprobes.c
arch/x86/kernel/microcode_amd.c
arch/x86/kernel/microcode_intel.c
arch/x86/kernel/pci-gart_64.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/reboot_fixups_32.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/stacktrace.c
arch/x86/kernel/time.c
arch/x86/kernel/trampoline_64.S
arch/x86/kernel/traps.c
arch/x86/kernel/tsc.c
arch/x86/kernel/verify_cpu.S [new file with mode: 0644]
arch/x86/kernel/verify_cpu_64.S [deleted file]
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/i8259.c
arch/x86/kvm/mmu.c
arch/x86/mm/Makefile
arch/x86/mm/amdtopology_64.c [new file with mode: 0644]
arch/x86/mm/init.c
arch/x86/mm/init_32.c
arch/x86/mm/k8topology_64.c [deleted file]
arch/x86/mm/kmemcheck/error.c
arch/x86/mm/numa_64.c
arch/x86/mm/pageattr.c
arch/x86/mm/setup_nx.c
arch/x86/mm/srat_32.c
arch/x86/mm/srat_64.c
arch/x86/oprofile/backtrace.c
arch/x86/oprofile/nmi_int.c
arch/x86/oprofile/nmi_timer_int.c
arch/x86/oprofile/op_model_amd.c
arch/x86/oprofile/op_model_p4.c
arch/x86/pci/Makefile
arch/x86/pci/ce4100.c [new file with mode: 0644]
arch/x86/pci/pcbios.c
arch/x86/platform/Makefile
arch/x86/platform/ce4100/Makefile [new file with mode: 0644]
arch/x86/platform/ce4100/ce4100.c [new file with mode: 0644]
arch/x86/platform/iris/Makefile [new file with mode: 0644]
arch/x86/platform/iris/iris.c [new file with mode: 0644]
arch/x86/platform/mrst/Makefile
arch/x86/platform/mrst/early_printk_mrst.c [new file with mode: 0644]
arch/x86/platform/mrst/mrst.c
arch/x86/platform/mrst/vrtc.c [new file with mode: 0644]
arch/x86/platform/sfi/sfi.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/platform/visws/visws_quirks.c
drivers/acpi/acpica/evgpeinit.c
drivers/acpi/acpica/nsinit.c
drivers/acpi/battery.c
drivers/acpi/numa.c
drivers/acpi/scan.c
drivers/ata/Kconfig
drivers/ata/Makefile
drivers/ata/libata-core.c
drivers/ata/libata-eh.c
drivers/ata/libata-sff.c
drivers/ata/pata_cs5536.c
drivers/atm/atmtcp.c
drivers/bluetooth/hci_ldisc.c
drivers/char/agp/amd64-agp.c
drivers/char/agp/intel-gtt.c
drivers/char/ramoops.c
drivers/cpufreq/cpufreq.c
drivers/cpuidle/cpuidle.c
drivers/dma/mv_xor.c
drivers/edac/amd64_edac.c
drivers/gpio/cs5535-gpio.c
drivers/gpio/gpiolib.c
drivers/gpio/rdc321x-gpio.c
drivers/gpu/drm/drm_crtc_helper.c
drivers/gpu/drm/i915/dvo_ch7017.c
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.h
drivers/gpu/drm/i915/intel_sdvo.c
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/evergreend.h
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/r600_cs.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_fb.c
drivers/hwmon/s3c-hwmon.c
drivers/idle/intel_idle.c
drivers/isdn/gigaset/capi.c
drivers/leds/led-class.c
drivers/media/IR/keymaps/rc-rc6-mce.c
drivers/media/IR/lirc_dev.c
drivers/media/IR/mceusb.c
drivers/media/IR/nuvoton-cir.c
drivers/media/IR/streamzap.c
drivers/media/video/cx25840/cx25840-core.c
drivers/media/video/cx88/cx88-alsa.c
drivers/media/video/cx88/cx88-cards.c
drivers/media/video/cx88/cx88-video.c
drivers/media/video/cx88/cx88.h
drivers/media/video/em28xx/em28xx-video.c
drivers/media/video/mx2_camera.c
drivers/media/video/s5p-fimc/fimc-capture.c
drivers/media/video/s5p-fimc/fimc-core.c
drivers/media/video/s5p-fimc/fimc-core.h
drivers/media/video/s5p-fimc/regs-fimc.h
drivers/media/video/sh_mobile_ceu_camera.c
drivers/media/video/soc_camera.c
drivers/media/video/wm8775.c
drivers/mfd/ab8500-core.c
drivers/mfd/wm831x-core.c
drivers/mmc/core/core.c
drivers/mmc/host/at91_mci.c
drivers/mmc/host/atmel-mci.c
drivers/net/atl1c/atl1c_main.c
drivers/net/atlx/atl1.c
drivers/net/benet/be.h
drivers/net/benet/be_cmds.c
drivers/net/benet/be_main.c
drivers/net/bonding/bond_ipv6.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bonding.h
drivers/net/cnic.c
drivers/net/ehea/ehea_ethtool.c
drivers/net/epic100.c
drivers/net/hamachi.c
drivers/net/pcmcia/axnet_cs.c
drivers/net/pcmcia/pcnet_cs.c
drivers/net/ppp_generic.c
drivers/net/skfp/skfddi.c
drivers/net/starfire.c
drivers/net/sundance.c
drivers/net/tehuti.c
drivers/net/tg3.c
drivers/net/typhoon.c
drivers/net/usb/asix.c
drivers/net/usb/mcs7830.c
drivers/net/veth.c
drivers/net/wireless/hostap/hostap_main.c
drivers/net/wireless/iwlwifi/iwl-1000.c
drivers/net/wireless/iwlwifi/iwl-6000.c
drivers/net/wireless/iwlwifi/iwl-agn-eeprom.c
drivers/net/wireless/iwlwifi/iwl-agn-lib.c
drivers/net/wireless/iwlwifi/iwl-core.h
drivers/net/wireless/iwlwifi/iwl-eeprom.h
drivers/net/wireless/libertas/cfg.c
drivers/net/wireless/p54/p54usb.c
drivers/net/wireless/rt2x00/rt2800pci.c
drivers/net/wireless/rt2x00/rt2x00.h
drivers/net/wireless/rt2x00/rt2x00dev.c
drivers/net/yellowfin.c
drivers/of/of_i2c.c
drivers/pci/hotplug/pciehp_acpi.c
drivers/platform/x86/intel_ips.c
drivers/platform/x86/intel_ips.h [new file with mode: 0644]
drivers/platform/x86/intel_scu_ipc.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/rtc-mrst.c [new file with mode: 0644]
drivers/rtc/rtc-rs5c372.c
drivers/scsi/bfa/bfa_fcs.c
drivers/scsi/bfa/bfa_fcs_fcpim.c
drivers/scsi/bfa/bfa_fcs_lport.c
drivers/scsi/bfa/bfa_fcs_rport.c
drivers/scsi/bfa/bfa_ioc.c
drivers/scsi/bfa/bfa_svc.c
drivers/scsi/bfa/bfad.c
drivers/scsi/bfa/bfad_drv.h
drivers/scsi/bfa/bfad_im.c
drivers/sh/intc/core.c
drivers/spi/coldfire_qspi.c
drivers/spi/mpc52xx_spi.c
drivers/spi/omap2_mcspi.c
drivers/spi/spi.c
drivers/spi/spi_fsl_espi.c
drivers/staging/zram/zram_drv.c
drivers/usb/atm/ueagle-atm.c
drivers/video/backlight/cr_bllcd.c
drivers/video/fbmem.c
drivers/video/imxfb.c
drivers/video/sh_mobile_hdmi.c
drivers/video/sh_mobile_lcdcfb.c
drivers/watchdog/hpwdt.c
drivers/watchdog/rdc321x_wdt.c
fs/ext4/resize.c
fs/gfs2/bmap.c
fs/gfs2/glock.c
fs/gfs2/glock.h
fs/gfs2/glops.c
fs/gfs2/incore.h
fs/gfs2/inode.c
fs/gfs2/lock_dlm.c
fs/gfs2/ops_inode.c
fs/gfs2/quota.c
fs/gfs2/rgrp.c
fs/gfs2/rgrp.h
fs/gfs2/xattr.c
fs/logfs/journal.c
fs/logfs/readwrite.c
fs/ocfs2/aops.c
fs/ocfs2/aops.h
fs/ocfs2/cluster/masklog.c
fs/ocfs2/cluster/masklog.h
fs/ocfs2/dir.c
fs/ocfs2/dlm/dlmmaster.c
fs/ocfs2/file.c
fs/ocfs2/ocfs2_fs.h
fs/proc/base.c
include/linux/completion.h
include/linux/dmaengine.h
include/linux/ftrace_event.h
include/linux/hrtimer.h
include/linux/init_task.h
include/linux/interrupt.h
include/linux/kprobes.h
include/linux/kthread.h
include/linux/module.h
include/linux/mutex.h
include/linux/netlink.h
include/linux/nmi.h
include/linux/perf_event.h
include/linux/rculist.h
include/linux/rcupdate.h
include/linux/rcutiny.h
include/linux/rcutree.h
include/linux/sched.h
include/linux/sfi.h
include/linux/stacktrace.h
include/linux/syscalls.h
include/linux/taskstats.h
include/linux/timer.h
include/linux/timerqueue.h [new file with mode: 0644]
include/linux/tracepoint.h
include/linux/unaligned/packed_struct.h
include/linux/workqueue.h
include/media/wm8775.h
include/net/flow.h
include/net/ip6_route.h
include/net/mac80211.h
include/net/pkt_cls.h
include/net/sch_generic.h
include/net/sock.h
include/trace/define_trace.h
include/trace/events/power.h
include/trace/events/syscalls.h
include/trace/ftrace.h
init/Kconfig
init/do_mounts.c
init/main.c
kernel/cpu.c
kernel/fork.c
kernel/futex.c
kernel/hrtimer.c
kernel/hw_breakpoint.c
kernel/irq/manage.c
kernel/kprobes.c
kernel/kthread.c
kernel/lockdep_proc.c
kernel/module.c
kernel/mutex.c
kernel/perf_event.c
kernel/posix-timers.c
kernel/power/suspend.c
kernel/printk.c
kernel/rcutiny.c
kernel/rcutiny_plugin.h
kernel/rcutorture.c
kernel/rcutree.c
kernel/rcutree.h
kernel/rcutree_plugin.h
kernel/rcutree_trace.c
kernel/sched.c
kernel/sched_autogroup.c [new file with mode: 0644]
kernel/sched_autogroup.h [new file with mode: 0644]
kernel/sched_clock.c
kernel/sched_debug.c
kernel/sched_fair.c
kernel/sched_features.h
kernel/sched_rt.c
kernel/softirq.c
kernel/srcu.c
kernel/sys.c
kernel/sysctl.c
kernel/sysctl_binary.c
kernel/taskstats.c
kernel/time/timecompare.c
kernel/time/timekeeping.c
kernel/time/timer_list.c
kernel/timer.c
kernel/trace/Kconfig
kernel/trace/power-traces.c
kernel/trace/ring_buffer.c
kernel/trace/trace_event_perf.c
kernel/trace/trace_events.c
kernel/trace/trace_export.c
kernel/trace/trace_selftest.c
kernel/user.c
kernel/watchdog.c
lib/Kconfig.debug
lib/Makefile
lib/timerqueue.c [new file with mode: 0644]
mm/compaction.c
mm/memcontrol.c
mm/migrate.c
mm/nommu.c
mm/page-writeback.c
mm/percpu.c
net/bluetooth/rfcomm/core.c
net/bridge/br_multicast.c
net/bridge/br_stp_bpdu.c
net/can/bcm.c
net/core/fib_rules.c
net/core/sock.c
net/ipv4/fib_frontend.c
net/ipv4/route.c
net/ipv4/tcp_ipv4.c
net/ipv4/udp.c
net/ipv4/udplite.c
net/ipv6/addrconf.c
net/ipv6/ip6_output.c
net/ipv6/route.c
net/ipv6/udp.c
net/ipv6/udplite.c
net/ipv6/xfrm6_output.c
net/irda/af_irda.c
net/mac80211/ibss.c
net/mac80211/rx.c
net/mac80211/work.c
net/sched/sch_sfq.c
net/sctp/socket.c
scripts/Makefile.build
scripts/kconfig/menu.c
scripts/kernel-doc
security/integrity/ima/ima_policy.c
security/keys/request_key.c
sound/core/pcm_lib.c
sound/oss/soundcard.c
sound/pci/hda/hda_codec.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_realtek.c
sound/pci/hda/patch_sigmatel.c
sound/soc/codecs/max98088.c
sound/soc/codecs/wm8523.c
sound/soc/codecs/wm8741.c
sound/soc/codecs/wm8753.c
sound/soc/codecs/wm8904.c
sound/soc/codecs/wm8940.c
sound/soc/codecs/wm8955.c
sound/soc/codecs/wm8960.c
sound/soc/codecs/wm8962.c
sound/soc/codecs/wm8971.c
sound/soc/codecs/wm9081.c
sound/soc/codecs/wm9090.c
tools/perf/Documentation/perf-annotate.txt
tools/perf/Documentation/perf-buildid-list.txt
tools/perf/Documentation/perf-diff.txt
tools/perf/Documentation/perf-kvm.txt
tools/perf/Documentation/perf-lock.txt
tools/perf/Documentation/perf-probe.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-sched.txt
tools/perf/Documentation/perf-script-perl.txt [new file with mode: 0644]
tools/perf/Documentation/perf-script-python.txt [new file with mode: 0644]
tools/perf/Documentation/perf-script.txt [new file with mode: 0644]
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-test.txt
tools/perf/Documentation/perf-timechart.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Documentation/perf-trace-perl.txt [deleted file]
tools/perf/Documentation/perf-trace-python.txt [deleted file]
tools/perf/Documentation/perf-trace.txt [deleted file]
tools/perf/MANIFEST
tools/perf/Makefile
tools/perf/bench/mem-memcpy-arch.h [new file with mode: 0644]
tools/perf/bench/mem-memcpy-x86-64-asm-def.h [new file with mode: 0644]
tools/perf/bench/mem-memcpy-x86-64-asm.S [new file with mode: 0644]
tools/perf/bench/mem-memcpy.c
tools/perf/builtin-annotate.c
tools/perf/builtin-buildid-list.c
tools/perf/builtin-diff.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-lock.c
tools/perf/builtin-probe.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-script.c [new file with mode: 0644]
tools/perf/builtin-stat.c
tools/perf/builtin-test.c
tools/perf/builtin-timechart.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c [deleted file]
tools/perf/builtin.h
tools/perf/command-list.txt
tools/perf/feature-tests.mak
tools/perf/perf.c
tools/perf/scripts/perl/Perf-Trace-Util/Context.c
tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
tools/perf/scripts/perl/Perf-Trace-Util/README
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
tools/perf/scripts/perl/bin/failed-syscalls-report
tools/perf/scripts/perl/bin/rw-by-file-report
tools/perf/scripts/perl/bin/rw-by-pid-report
tools/perf/scripts/perl/bin/rwtop-report
tools/perf/scripts/perl/bin/wakeup-latency-report
tools/perf/scripts/perl/bin/workqueue-stats-report
tools/perf/scripts/perl/check-perf-trace.pl
tools/perf/scripts/perl/rw-by-file.pl
tools/perf/scripts/perl/workqueue-stats.pl
tools/perf/scripts/python/Perf-Trace-Util/Context.c
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
tools/perf/scripts/python/bin/futex-contention-report
tools/perf/scripts/python/bin/netdev-times-report
tools/perf/scripts/python/bin/sched-migration-report
tools/perf/scripts/python/bin/sctop-report
tools/perf/scripts/python/bin/syscall-counts-by-pid-report
tools/perf/scripts/python/bin/syscall-counts-report
tools/perf/scripts/python/check-perf-trace.py
tools/perf/scripts/python/failed-syscalls-by-pid.py
tools/perf/scripts/python/sched-migration.py
tools/perf/scripts/python/sctop.py
tools/perf/scripts/python/syscall-counts-by-pid.py
tools/perf/scripts/python/syscall-counts.py
tools/perf/util/build-id.c
tools/perf/util/cpumap.c
tools/perf/util/cpumap.h
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evsel.c [new file with mode: 0644]
tools/perf/util/evsel.h [new file with mode: 0644]
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/include/asm/cpufeature.h [new file with mode: 0644]
tools/perf/util/include/asm/dwarf2.h [new file with mode: 0644]
tools/perf/util/include/linux/bitops.h
tools/perf/util/include/linux/linkage.h [new file with mode: 0644]
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/parse-options.h
tools/perf/util/probe-event.c
tools/perf/util/probe-finder.c
tools/perf/util/probe-finder.h
tools/perf/util/scripting-engines/trace-event-perl.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/sort.c
tools/perf/util/string.c
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/trace-event-info.c
tools/perf/util/trace-event.h
tools/perf/util/ui/util.c
tools/perf/util/util.c
tools/perf/util/util.h
tools/perf/util/xyarray.c [new file with mode: 0644]
tools/perf/util/xyarray.h [new file with mode: 0644]

diff --git a/CREDITS b/CREDITS
index 41d8e63d5165b5b786db6ab7d8c14fbc49fc0107..494b6e4746d7b9d08f0334bab61651341d285068 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -2365,8 +2365,6 @@ E: acme@redhat.com
 W: http://oops.ghostprotocols.net:81/blog/
 P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD  841A B6AB 4681 9224 DF01
 D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
-S: R. Brasílio Itiberê, 4270/1010 - Água Verde
-S: 80240-060 - Curitiba - Paraná
 S: Brazil
 
 N: Karsten Merker
index a851118775d84c7a1d2356ba6a6c8e6208292887..6a8c73f55b80ca38601ba96f179565fe8b0b7ea0 100644 (file)
@@ -1,18 +1,22 @@
 CONFIG_RCU_TRACE debugfs Files and Formats
 
 
-The rcutree implementation of RCU provides debugfs trace output that
-summarizes counters and state.  This information is useful for debugging
-RCU itself, and can sometimes also help to debug abuses of RCU.
-The following sections describe the debugfs files and formats.
+The rcutree and rcutiny implementations of RCU provide debugfs trace
+output that summarizes counters and state.  This information is useful for
+debugging RCU itself, and can sometimes also help to debug abuses of RCU.
+The following sections describe the debugfs files and formats, first
+for rcutree and next for rcutiny.
 
 
-Hierarchical RCU debugfs Files and Formats
+CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats
 
-This implementation of RCU provides three debugfs files under the
+These implementations of RCU provides five debugfs files under the
 top-level directory RCU: rcu/rcudata (which displays fields in struct
-rcu_data), rcu/rcugp (which displays grace-period counters), and
-rcu/rcuhier (which displays the struct rcu_node hierarchy).
+rcu_data), rcu/rcudata.csv (which is a .csv spreadsheet version of
+rcu/rcudata), rcu/rcugp (which displays grace-period counters),
+rcu/rcuhier (which displays the struct rcu_node hierarchy), and
+rcu/rcu_pending (which displays counts of the reasons that the
+rcu_pending() function decided that there was core RCU work to do).
 
 The output of "cat rcu/rcudata" looks as follows:
 
@@ -130,7 +134,8 @@ o   "ci" is the number of RCU callbacks that have been invoked for
        been registered in absence of CPU-hotplug activity.
 
 o      "co" is the number of RCU callbacks that have been orphaned due to
-       this CPU going offline.
+       this CPU going offline.  These orphaned callbacks have been moved
+       to an arbitrarily chosen online CPU.
 
 o      "ca" is the number of RCU callbacks that have been adopted due to
        other CPUs going offline.  Note that ci+co-ca+ql is the number of
@@ -168,12 +173,12 @@ o "gpnum" is the number of grace periods that have started.  It is
 
 The output of "cat rcu/rcuhier" looks as follows, with very long lines:
 
-c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0
+c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
 1/1 .>. 0:127 ^0    
 3/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
 3/3f .>. 0:5 ^0    2/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3    
 rcu_bh:
-c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0
+c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
 0/1 .>. 0:127 ^0    
 0/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
 0/3f .>. 0:5 ^0    0/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3
@@ -212,11 +217,6 @@ o  "fqlh" is the number of calls to force_quiescent_state() that
        exited immediately (without even being counted in nfqs above)
        due to contention on ->fqslock.
 
-o      "oqlen" is the number of callbacks on the "orphan" callback
-       list.  RCU callbacks are placed on this list by CPUs going
-       offline, and are "adopted" either by the CPU helping the outgoing
-       CPU or by the next rcu_barrier*() call, whichever comes first.
-
 o      Each element of the form "1/1 0:127 ^0" represents one struct
        rcu_node.  Each line represents one level of the hierarchy, from
        root to leaves.  It is best to think of the rcu_data structures
@@ -326,3 +326,115 @@ o "nn" is the number of times that this CPU needed nothing.  Alert
        readers will note that the rcu "nn" number for a given CPU very
        closely matches the rcu_bh "np" number for that same CPU.  This
        is due to short-circuit evaluation in rcu_pending().
+
+
+CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats
+
+These implementations of RCU provides a single debugfs file under the
+top-level directory RCU, namely rcu/rcudata, which displays fields in
+rcu_bh_ctrlblk, rcu_sched_ctrlblk and, for CONFIG_TINY_PREEMPT_RCU,
+rcu_preempt_ctrlblk.
+
+The output of "cat rcu/rcudata" is as follows:
+
+rcu_preempt: qlen=24 gp=1097669 g197/p197/c197 tasks=...
+             ttb=. btg=no ntb=184 neb=0 nnb=183 j=01f7 bt=0274
+             normal balk: nt=1097669 gt=0 bt=371 b=0 ny=25073378 nos=0
+             exp balk: bt=0 nos=0
+rcu_sched: qlen: 0
+rcu_bh: qlen: 0
+
+This is split into rcu_preempt, rcu_sched, and rcu_bh sections, with the
+rcu_preempt section appearing only in CONFIG_TINY_PREEMPT_RCU builds.
+The last three lines of the rcu_preempt section appear only in
+CONFIG_RCU_BOOST kernel builds.  The fields are as follows:
+
+o      "qlen" is the number of RCU callbacks currently waiting either
+       for an RCU grace period or waiting to be invoked.  This is the
+       only field present for rcu_sched and rcu_bh, due to the
+       short-circuiting of grace period in those two cases.
+
+o      "gp" is the number of grace periods that have completed.
+
+o      "g197/p197/c197" displays the grace-period state, with the
+       "g" number being the number of grace periods that have started
+       (mod 256), the "p" number being the number of grace periods
+       that the CPU has responded to (also mod 256), and the "c"
+       number being the number of grace periods that have completed
+       (once again mode 256).
+
+       Why have both "gp" and "g"?  Because the data flowing into
+       "gp" is only present in a CONFIG_RCU_TRACE kernel.
+
+o      "tasks" is a set of bits.  The first bit is "T" if there are
+       currently tasks that have recently blocked within an RCU
+       read-side critical section, the second bit is "N" if any of the
+       aforementioned tasks are blocking the current RCU grace period,
+       and the third bit is "E" if any of the aforementioned tasks are
+       blocking the current expedited grace period.  Each bit is "."
+       if the corresponding condition does not hold.
+
+o      "ttb" is a single bit.  It is "B" if any of the blocked tasks
+       need to be priority boosted and "." otherwise.
+
+o      "btg" indicates whether boosting has been carried out during
+       the current grace period, with "exp" indicating that boosting
+       is in progress for an expedited grace period, "no" indicating
+       that boosting has not yet started for a normal grace period,
+       "begun" indicating that boosting has bebug for a normal grace
+       period, and "done" indicating that boosting has completed for
+       a normal grace period.
+
+o      "ntb" is the total number of tasks subjected to RCU priority boosting
+       periods since boot.
+
+o      "neb" is the number of expedited grace periods that have had
+       to resort to RCU priority boosting since boot.
+
+o      "nnb" is the number of normal grace periods that have had
+       to resort to RCU priority boosting since boot.
+
+o      "j" is the low-order 12 bits of the jiffies counter in hexadecimal.
+
+o      "bt" is the low-order 12 bits of the value that the jiffies counter
+       will have at the next time that boosting is scheduled to begin.
+
+o      In the line beginning with "normal balk", the fields are as follows:
+
+       o       "nt" is the number of times that the system balked from
+               boosting because there were no blocked tasks to boost.
+               Note that the system will balk from boosting even if the
+               grace period is overdue when the currently running task
+               is looping within an RCU read-side critical section.
+               There is no point in boosting in this case, because
+               boosting a running task won't make it run any faster.
+
+       o       "gt" is the number of times that the system balked
+               from boosting because, although there were blocked tasks,
+               none of them were preventing the current grace period
+               from completing.
+
+       o       "bt" is the number of times that the system balked
+               from boosting because boosting was already in progress.
+
+       o       "b" is the number of times that the system balked from
+               boosting because boosting had already completed for
+               the grace period in question.
+
+       o       "ny" is the number of times that the system balked from
+               boosting because it was not yet time to start boosting
+               the grace period in question.
+
+       o       "nos" is the number of times that the system balked from
+               boosting for inexplicable ("not otherwise specified")
+               reasons.  This can actually happen due to races involving
+               increments of the jiffies counter.
+
+o      In the line beginning with "exp balk", the fields are as follows:
+
+       o       "bt" is the number of times that the system balked from
+               boosting because there were no blocked tasks to boost.
+
+       o       "nos" is the number of times that the system balked from
+                boosting for inexplicable ("not otherwise specified")
+                reasons.
index a2976a6de033df2b4247f69b48f56f44abed3378..e9c77788a39d8f2c5c807b59295be6c3f1b3fea6 100644 (file)
@@ -516,6 +516,7 @@ int main(int argc, char *argv[])
                        default:
                                fprintf(stderr, "Unknown nla_type %d\n",
                                        na->nla_type);
+                       case TASKSTATS_TYPE_NULL:
                                break;
                        }
                        na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
index d9bcffd594331d7b52a5608269327a7c6642af31..470d3dba1a69aa48c55d98bea3c70d094f458de6 100644 (file)
@@ -62,6 +62,10 @@ aic7*reg_print.c*
 aic7*seq.h*
 aicasm
 aicdb.h*
+altivec1.c
+altivec2.c
+altivec4.c
+altivec8.c
 asm-offsets.h
 asm_offsets.h
 autoconf.h*
@@ -76,6 +80,7 @@ btfixupprep
 build
 bvmlinux
 bzImage*
+capflags.c
 classlist.h*
 comp*.log
 compile.h*
@@ -94,6 +99,7 @@ devlist.h*
 docproc
 elf2ecoff
 elfconfig.h*
+evergreen_reg_safe.h
 fixdep
 flask.h
 fore200e_mkfirm
@@ -108,9 +114,16 @@ genksyms
 *_gray256.c
 ihex2fw
 ikconfig.h*
+inat-tables.c
 initramfs_data.cpio
 initramfs_data.cpio.gz
 initramfs_list
+int16.c
+int1.c
+int2.c
+int32.c
+int4.c
+int8.c
 kallsyms
 kconfig
 keywords.c
@@ -140,6 +153,7 @@ mkprep
 mktables
 mktree
 modpost
+modules.builtin
 modules.order
 modversions.h*
 ncscope.*
@@ -153,14 +167,23 @@ pca200e.bin
 pca200e_ecd.bin2
 piggy.gz
 piggyback
+piggy.S
 pnmtologo
 ppc_defs.h*
 pss_boot.h
 qconf
+r100_reg_safe.h
+r200_reg_safe.h
+r300_reg_safe.h
+r420_reg_safe.h
+r600_reg_safe.h
 raid6altivec*.c
 raid6int*.c
 raid6tables.c
 relocs
+rn50_reg_safe.h
+rs600_reg_safe.h
+rv515_reg_safe.h
 series
 setup
 setup.bin
@@ -169,6 +192,7 @@ sImage
 sm_tbl*
 split-include
 syscalltab.h
+tables.c
 tags
 tftpboot.img
 timeconst.h
@@ -190,6 +214,7 @@ vmlinux
 vmlinux-*
 vmlinux.aout
 vmlinux.lds
+voffset.h
 vsyscall.lds
 vsyscall_32.lds
 wanxlfw.inc
@@ -200,3 +225,4 @@ wakeup.elf
 wakeup.lds
 zImage*
 zconf.hash.c
+zoffset.h
index b6426f15b4ae85f5469b962806004237b9b5607e..33fa3e5d38fd7480d2ddd136b68f77fef7734c1d 100644 (file)
@@ -18,7 +18,6 @@ prototypes:
        char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
 
 locking rules:
-       none have BKL
                dcache_lock     rename_lock     ->d_lock        may block
 d_revalidate:  no              no              no              yes
 d_hash         no              no              no              yes
@@ -42,18 +41,23 @@ ata *);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
        int (*readlink) (struct dentry *, char __user *,int);
-       int (*follow_link) (struct dentry *, struct nameidata *);
+       void * (*follow_link) (struct dentry *, struct nameidata *);
+       void (*put_link) (struct dentry *, struct nameidata *, void *);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int, struct nameidata *);
+       int (*check_acl)(struct inode *, int);
        int (*setattr) (struct dentry *, struct iattr *);
        int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
        int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
        int (*removexattr) (struct dentry *, const char *);
+       void (*truncate_range)(struct inode *, loff_t, loff_t);
+       long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len);
+       int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 
 locking rules:
-       all may block, none have BKL
+       all may block
                i_mutex(inode)
 lookup:                yes
 create:                yes
@@ -66,19 +70,24 @@ rmdir:              yes (both)      (see below)
 rename:                yes (all)       (see below)
 readlink:      no
 follow_link:   no
+put_link:      no
 truncate:      yes             (see below)
 setattr:       yes
 permission:    no
+check_acl:     no
 getattr:       no
 setxattr:      yes
 getxattr:      no
 listxattr:     no
 removexattr:   yes
+truncate_range:        yes
+fallocate:     no
+fiemap:                no
        Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
        cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
        ->truncate() is never called directly - it's a callback, not a
-method. It's called by vmtruncate() - library function normally used by
+method. It's called by vmtruncate() - deprecated library function used by
 ->setattr(). Locking information above applies to that call (i.e. is
 inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been
 passed).
@@ -91,7 +100,7 @@ prototypes:
        struct inode *(*alloc_inode)(struct super_block *sb);
        void (*destroy_inode)(struct inode *);
        void (*dirty_inode) (struct inode *);
-       int (*write_inode) (struct inode *, int);
+       int (*write_inode) (struct inode *, struct writeback_control *wbc);
        int (*drop_inode) (struct inode *);
        void (*evict_inode) (struct inode *);
        void (*put_super) (struct super_block *);
@@ -105,10 +114,10 @@ prototypes:
        int (*show_options)(struct seq_file *, struct vfsmount *);
        ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
        ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+       int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
 
 locking rules:
        All may block [not true, see below]
-       None have BKL
                        s_umount
 alloc_inode:
 destroy_inode:
@@ -127,6 +136,7 @@ umount_begin:               no
 show_options:          no              (namespace_sem)
 quota_read:            no              (see below)
 quota_write:           no              (see below)
+bdev_try_to_free_page: no              (see below)
 
 ->statfs() has s_umount (shared) when called by ustat(2) (native or
 compat), but that's an accident of bad API; s_umount is used to pin
@@ -139,19 +149,25 @@ be the only ones operating on the quota file by the quota code (via
 dqio_sem) (unless an admin really wants to screw up something and
 writes to quota files with quotas on). For other details about locking
 see also dquot_operations section.
+->bdev_try_to_free_page is called from the ->releasepage handler of
+the block device inode.  See there for more details.
 
 --------------------------- file_system_type ---------------------------
 prototypes:
        int (*get_sb) (struct file_system_type *, int,
                       const char *, void *, struct vfsmount *);
+       struct dentry *(*mount) (struct file_system_type *, int,
+                      const char *, void *);
        void (*kill_sb) (struct super_block *);
 locking rules:
-               may block       BKL
-get_sb         yes             no
-kill_sb                yes             no
+               may block
+get_sb         yes
+mount          yes
+kill_sb                yes
 
 ->get_sb() returns error or 0 with locked superblock attached to the vfsmount
 (exclusive on ->s_umount).
+->mount() returns ERR_PTR or the root dentry.
 ->kill_sb() takes a write-locked superblock, does all shutdown work on it,
 unlocks and drops the reference.
 
@@ -176,27 +192,35 @@ prototypes:
        void (*freepage)(struct page *);
        int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
                        loff_t offset, unsigned long nr_segs);
-       int (*launder_page) (struct page *);
+       int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
+                               unsigned long *);
+       int (*migratepage)(struct address_space *, struct page *, struct page *);
+       int (*launder_page)(struct page *);
+       int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long);
+       int (*error_remove_page)(struct address_space *, struct page *);
 
 locking rules:
        All except set_page_dirty and freepage may block
 
-                       BKL     PageLocked(page)        i_mutex
-writepage:             no      yes, unlocks (see below)
-readpage:              no      yes, unlocks
-sync_page:             no      maybe
-writepages:            no
-set_page_dirty         no      no
-readpages:             no
-write_begin:           no      locks the page          yes
-write_end:             no      yes, unlocks            yes
-perform_write:         no      n/a                     yes
-bmap:                  no
-invalidatepage:                no      yes
-releasepage:           no      yes
-freepage:              no      yes
-direct_IO:             no
-launder_page:          no      yes
+                       PageLocked(page)        i_mutex
+writepage:             yes, unlocks (see below)
+readpage:              yes, unlocks
+sync_page:             maybe
+writepages:
+set_page_dirty         no
+readpages:
+write_begin:           locks the page          yes
+write_end:             yes, unlocks            yes
+bmap:
+invalidatepage:                yes
+releasepage:           yes
+freepage:              yes
+direct_IO:
+get_xip_mem:                                   maybe
+migratepage:           yes (both)
+launder_page:          yes
+is_partially_uptodate: yes
+error_remove_page:     yes
 
        ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
 may be called from the request handler (/dev/loop).
@@ -276,9 +300,8 @@ under spinlock (it cannot block) and is sometimes called with the page
 not locked.
 
        ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
-filesystems and by the swapper. The latter will eventually go away. All
-instances do not actually need the BKL. Please, keep it that way and don't
-breed new callers.
+filesystems and by the swapper. The latter will eventually go away.  Please,
+keep it that way and don't breed new callers.
 
        ->invalidatepage() is called when the filesystem must attempt to drop
 some or all of the buffers from the page when it is being truncated.  It
@@ -299,47 +322,37 @@ cleaned, or an error value if not. Note that in order to prevent the page
 getting mapped back in and redirtied, it needs to be kept locked
 across the entire operation.
 
-       Note: currently almost all instances of address_space methods are
-using BKL for internal serialization and that's one of the worst sources
-of contention. Normally they are calling library functions (in fs/buffer.c)
-and pass foo_get_block() as a callback (on local block-based filesystems,
-indeed). BKL is not needed for library stuff and is usually taken by
-foo_get_block(). It's an overkill, since block bitmaps can be protected by
-internal fs locking and real critical areas are much smaller than the areas
-filesystems protect now.
-
 ----------------------- file_lock_operations ------------------------------
 prototypes:
-       void (*fl_insert)(struct file_lock *);  /* lock insertion callback */
-       void (*fl_remove)(struct file_lock *);  /* lock removal callback */
        void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
        void (*fl_release_private)(struct file_lock *);
 
 
 locking rules:
-                       BKL     may block
-fl_insert:             yes     no
-fl_remove:             yes     no
-fl_copy_lock:          yes     no
-fl_release_private:    yes     yes
+                       file_lock_lock  may block
+fl_copy_lock:          yes             no
+fl_release_private:    maybe           no
 
 ----------------------- lock_manager_operations ---------------------------
 prototypes:
        int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
        void (*fl_notify)(struct file_lock *);  /* unblock callback */
+       int (*fl_grant)(struct file_lock *, struct file_lock *, int);
        void (*fl_release_private)(struct file_lock *);
        void (*fl_break)(struct file_lock *); /* break_lease callback */
+       int (*fl_mylease)(struct file_lock *, struct file_lock *);
+       int (*fl_change)(struct file_lock **, int);
 
 locking rules:
-                       BKL     may block
-fl_compare_owner:      yes     no
-fl_notify:             yes     no
-fl_release_private:    yes     yes
-fl_break:              yes     no
-
-       Currently only NFSD and NLM provide instances of this class. None of the
-them block. If you have out-of-tree instances - please, show up. Locking
-in that area will change.
+                       file_lock_lock  may block
+fl_compare_owner:      yes             no
+fl_notify:             yes             no
+fl_grant:              no              no
+fl_release_private:    maybe           no
+fl_break:              yes             no
+fl_mylease:            yes             no
+fl_change              yes             no
+
 --------------------------- buffer_head -----------------------------------
 prototypes:
        void (*b_end_io)(struct buffer_head *bh, int uptodate);
@@ -364,17 +377,17 @@ prototypes:
        void (*swap_slot_free_notify) (struct block_device *, unsigned long);
 
 locking rules:
-                       BKL     bd_mutex
-open:                  no      yes
-release:               no      yes
-ioctl:                 no      no
-compat_ioctl:          no      no
-direct_access:         no      no
-media_changed:         no      no
-unlock_native_capacity:        no      no
-revalidate_disk:       no      no
-getgeo:                        no      no
-swap_slot_free_notify: no      no      (see below)
+                       bd_mutex
+open:                  yes
+release:               yes
+ioctl:                 no
+compat_ioctl:          no
+direct_access:         no
+media_changed:         no
+unlock_native_capacity:        no
+revalidate_disk:       no
+getgeo:                        no
+swap_slot_free_notify: no      (see below)
 
 media_changed, unlock_native_capacity and revalidate_disk are called only from
 check_disk_change().
@@ -413,34 +426,21 @@ prototypes:
        unsigned long (*get_unmapped_area)(struct file *, unsigned long,
                        unsigned long, unsigned long, unsigned long);
        int (*check_flags)(int);
+       int (*flock) (struct file *, int, struct file_lock *);
+       ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
+                       size_t, unsigned int);
+       ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
+                       size_t, unsigned int);
+       int (*setlease)(struct file *, long, struct file_lock **);
 };
 
 locking rules:
-       All may block.
-                       BKL
-llseek:                        no      (see below)
-read:                  no
-aio_read:              no
-write:                 no
-aio_write:             no
-readdir:               no
-poll:                  no
-unlocked_ioctl:                no
-compat_ioctl:          no
-mmap:                  no
-open:                  no
-flush:                 no
-release:               no
-fsync:                 no      (see below)
-aio_fsync:             no
-fasync:                        no
-lock:                  yes
-readv:                 no
-writev:                        no
-sendfile:              no
-sendpage:              no
-get_unmapped_area:     no
-check_flags:           no
+       All may block except for ->setlease.
+       No VFS locks held on entry except for ->fsync and ->setlease.
+
+->fsync() has i_mutex on inode.
+
+->setlease has the file_list_lock held and must not sleep.
 
 ->llseek() locking has moved from llseek to the individual llseek
 implementations.  If your fs is not using generic_file_llseek, you
@@ -450,17 +450,10 @@ mutex or just to use i_size_read() instead.
 Note: this does not protect the file->f_pos against concurrent modifications
 since this is something the userspace has to take care about.
 
-Note: ext2_release() was *the* source of contention on fs-intensive
-loads and dropping BKL on ->release() helps to get rid of that (we still
-grab BKL for cases when we close a file that had been opened r/w, but that
-can and should be done using the internal locking with smaller critical areas).
-Current worst offender is ext2_get_block()...
-
-->fasync() is called without BKL protection, and is responsible for
-maintaining the FASYNC bit in filp->f_flags.  Most instances call
-fasync_helper(), which does that maintenance, so it's not normally
-something one needs to worry about.  Return values > 0 will be mapped to
-zero in the VFS layer.
+->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
+Most instances call fasync_helper(), which does that maintenance, so it's
+not normally something one needs to worry about.  Return values > 0 will be
+mapped to zero in the VFS layer.
 
 ->readdir() and ->ioctl() on directories must be changed. Ideally we would
 move ->readdir() to inode_operations and use a separate method for directory
@@ -471,8 +464,6 @@ components. And there are other reasons why the current interface is a mess...
 ->read on directories probably must go away - we should just enforce -EISDIR
 in sys_read() and friends.
 
-->fsync() has i_mutex on inode.
-
 --------------------------- dquot_operations -------------------------------
 prototypes:
        int (*write_dquot) (struct dquot *);
@@ -507,12 +498,12 @@ prototypes:
        int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
 
 locking rules:
-               BKL     mmap_sem        PageLocked(page)
-open:          no      yes
-close:         no      yes
-fault:         no      yes             can return with page locked
-page_mkwrite:  no      yes             can return with page locked
-access:                no      yes
+               mmap_sem        PageLocked(page)
+open:          yes
+close:         yes
+fault:         yes             can return with page locked
+page_mkwrite:  yes             can return with page locked
+access:                yes
 
        ->fault() is called when a previously not present pte is about
 to be faulted in. The filesystem must find and return the page associated
@@ -539,6 +530,3 @@ VM_IO | VM_PFNMAP VMAs.
 
 (if you break something or notice that it is broken and do not fix it yourself
 - at least put it here)
-
-ipc/shm.c::shm_delete() - may need BKL.
-->read() and ->write() in many drivers are (probably) missing BKL.
index 715eaaf1519dd25fa0d4011684cd263bc12b2d2f..9a8674629a07598eb553970e1207b31b42ec0316 100644 (file)
        Notes: Further information in
        http://www.oreilly.com/catalog/linuxdrive2/
 
-     * Title: "Linux Device Drivers, 3nd Edition"
+     * Title: "Linux Device Drivers, 3rd Edition"
        Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman
        Publisher: O'Reilly & Associates.
        Date: 2005.
        Pages: 600.
        ISBN: 0-13-101908-2
 
-     * Title:  "The  Design  and Implementation of the 4.4 BSD UNIX
-       Operating System"
-       Author: Marshall Kirk McKusick, Keith Bostic, Michael J. Karels,
-       John S. Quarterman.
-       Publisher: Addison-Wesley.
-       Date: 1996.
-       ISBN: 0-201-54979-4
-
      * Title: "Programming for the real world - POSIX.4"
        Author: Bill O. Gallmeister.
        Publisher: O'Reilly & Associates, Inc..
        POSIX. Good reference.
 
      * Title:  "UNIX  Systems  for  Modern Architectures: Symmetric
-       Multiprocesssing and Caching for Kernel Programmers"
+       Multiprocessing and Caching for Kernel Programmers"
        Author: Curt Schimmel.
        Publisher: Addison Wesley.
        Date: June, 1994.
        Pages: 432.
        ISBN: 0-201-63338-8
 
-     * Title:  "The  Design  and Implementation of the 4.3 BSD UNIX
-       Operating System"
-       Author: Samuel J. Leffler, Marshall Kirk McKusick, Michael J.
-       Karels, John S. Quarterman.
-       Publisher: Addison-Wesley.
-       Date: 1989 (reprinted with corrections on October, 1990).
-       ISBN: 0-201-06196-1
-
-     * Title: "The Design of the UNIX Operating System"
-       Author: Maurice J. Bach.
-       Publisher: Prentice Hall.
-       Date: 1986.
-       Pages: 471.
-       ISBN: 0-13-201757-1
-
      MISCELLANEOUS:
 
      * Name: linux/Documentation
index 8b61c93609994dd91e36c25e1b29647ad084eaff..f3dc951e949f04255d90f35b5da4b78c7d015a67 100644 (file)
@@ -1579,20 +1579,12 @@ and is between 256 and 4096 characters. It is defined in the file
 
        nmi_watchdog=   [KNL,BUGS=X86] Debugging features for SMP kernels
                        Format: [panic,][num]
-                       Valid num: 0,1,2
+                       Valid num: 0
                        0 - turn nmi_watchdog off
-                       1 - use the IO-APIC timer for the NMI watchdog
-                       2 - use the local APIC for the NMI watchdog using
-                       a performance counter. Note: This will use one
-                       performance counter and the local APIC's performance
-                       vector.
                        When panic is specified, panic when an NMI watchdog
                        timeout occurs.
                        This is useful when you use a panic=... timeout and
                        need the box quickly up again.
-                       Instead of 1 and 2 it is possible to use the following
-                       symbolic names: lapic and ioapic
-                       Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
 
        netpoll.carrier_timeout=
                        [NET] Specifies amount of time (in seconds) that
@@ -1622,6 +1614,8 @@ and is between 256 and 4096 characters. It is defined in the file
        noapic          [SMP,APIC] Tells the kernel to not make use of any
                        IOAPICs that may be present in the system.
 
+       noautogroup     Disable scheduler automatic task group creation.
+
        nobats          [PPC] Do not use BATs for mapping kernel lowmem
                        on "Classic" PPC cores.
 
@@ -1759,7 +1753,7 @@ and is between 256 and 4096 characters. It is defined in the file
 
        nousb           [USB] Disable the USB subsystem
 
-       nowatchdog      [KNL] Disable the lockup detector.
+       nowatchdog      [KNL] Disable the lockup detector (NMI watchdog).
 
        nowb            [ARM]
 
@@ -2467,12 +2461,13 @@ and is between 256 and 4096 characters. It is defined in the file
                        to facilitate early boot debugging.
                        See also Documentation/trace/events.txt
 
-       tsc=            Disable clocksource-must-verify flag for TSC.
+       tsc=            Disable clocksource stability checks for TSC.
                        Format: <string>
                        [x86] reliable: mark tsc clocksource as reliable, this
-                       disables clocksource verification at runtime.
-                       Used to enable high-resolution timer mode on older
-                       hardware, and in virtualized environment.
+                       disables clocksource verification at runtime, as well
+                       as the stability checks done at bootup. Used to enable
+                       high-resolution timer mode on older hardware, and in
+                       virtualized environment.
                        [x86] noirqtime: Do not use TSC to do irq accounting.
                        Used to run time disable IRQ_TIME_ACCOUNTING on any
                        platforms where RDTSC is slow and this accounting
index 570ef2b3d79b16c2f57c01f88d5de3a1a954f6a4..df322c1034667eebbced0792f2c8a7c733e5cf78 100644 (file)
@@ -1044,9 +1044,9 @@ Details:
 
 
 /**
- *      queuecommand - queue scsi command, invoke 'done' on completion
+ *      queuecommand - queue scsi command, invoke scp->scsi_done on completion
+ *      @shost: pointer to the scsi host object
  *      @scp: pointer to scsi command object
- *      @done: function pointer to be invoked on completion
  *
  *      Returns 0 on success.
  *
@@ -1074,42 +1074,45 @@ Details:
  *
  *      Other types of errors that are detected immediately may be
  *      flagged by setting scp->result to an appropriate value,
- *      invoking the 'done' callback, and then returning 0 from this
- *      function. If the command is not performed immediately (and the
- *      LLD is starting (or will start) the given command) then this
- *      function should place 0 in scp->result and return 0.
+ *      invoking the scp->scsi_done callback, and then returning 0
+ *      from this function. If the command is not performed
+ *      immediately (and the LLD is starting (or will start) the given
+ *      command) then this function should place 0 in scp->result and
+ *      return 0.
  *
  *      Command ownership.  If the driver returns zero, it owns the
- *      command and must take responsibility for ensuring the 'done'
- *      callback is executed.  Note: the driver may call done before
- *      returning zero, but after it has called done, it may not
- *      return any value other than zero.  If the driver makes a
- *      non-zero return, it must not execute the command's done
- *      callback at any time.
- *
- *      Locks: struct Scsi_Host::host_lock held on entry (with "irqsave")
- *             and is expected to be held on return.
+ *      command and must take responsibility for ensuring the
+ *      scp->scsi_done callback is executed.  Note: the driver may
+ *      call scp->scsi_done before returning zero, but after it has
+ *      called scp->scsi_done, it may not return any value other than
+ *      zero.  If the driver makes a non-zero return, it must not
+ *      execute the command's scsi_done callback at any time.
+ *
+ *      Locks: up to and including 2.6.36, struct Scsi_Host::host_lock
+ *             held on entry (with "irqsave") and is expected to be
+ *             held on return. From 2.6.37 onwards, queuecommand is
+ *             called without any locks held.
  *
  *      Calling context: in interrupt (soft irq) or process context
  *
- *      Notes: This function should be relatively fast. Normally it will
- *      not wait for IO to complete. Hence the 'done' callback is invoked 
- *      (often directly from an interrupt service routine) some time after
- *      this function has returned. In some cases (e.g. pseudo adapter 
- *      drivers that manufacture the response to a SCSI INQUIRY)
- *      the 'done' callback may be invoked before this function returns.
- *      If the 'done' callback is not invoked within a certain period
- *      the SCSI mid level will commence error processing.
- *      If a status of CHECK CONDITION is placed in "result" when the
- *      'done' callback is invoked, then the LLD driver should 
- *      perform autosense and fill in the struct scsi_cmnd::sense_buffer
+ *      Notes: This function should be relatively fast. Normally it
+ *      will not wait for IO to complete. Hence the scp->scsi_done
+ *      callback is invoked (often directly from an interrupt service
+ *      routine) some time after this function has returned. In some
+ *      cases (e.g. pseudo adapter drivers that manufacture the
+ *      response to a SCSI INQUIRY) the scp->scsi_done callback may be
+ *      invoked before this function returns.  If the scp->scsi_done
+ *      callback is not invoked within a certain period the SCSI mid
+ *      level will commence error processing.  If a status of CHECK
+ *      CONDITION is placed in "result" when the scp->scsi_done
+ *      callback is invoked, then the LLD driver should perform
+ *      autosense and fill in the struct scsi_cmnd::sense_buffer
  *      array. The scsi_cmnd::sense_buffer array is zeroed prior to
  *      the mid level queuing a command to an LLD.
  *
  *      Defined in: LLD
  **/
-    int queuecommand(struct scsi_cmnd * scp, 
-                     void (*done)(struct scsi_cmnd *))
+    int queuecommand(struct Scsi_Host *shost, struct scsi_cmnd * scp)
 
 
 /**
diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.txt
new file mode 100644 (file)
index 0000000..96d87b6
--- /dev/null
@@ -0,0 +1,90 @@
+
+                       Subsystem Trace Points: power
+
+The power tracing system captures events related to power transitions
+within the kernel. Broadly speaking there are three major subheadings:
+
+  o Power state switch which reports events related to suspend (S-states),
+     cpuidle (C-states) and cpufreq (P-states)
+  o System clock related changes
+  o Power domains related changes and transitions
+
+This document describes what each of the tracepoints is and why they
+might be useful.
+
+Cf. include/trace/events/power.h for the events definitions.
+
+1. Power state switch events
+============================
+
+1.1 New trace API
+-----------------
+
+A 'cpu' event class gathers the CPU-related events: cpuidle and
+cpufreq.
+
+cpu_idle               "state=%lu cpu_id=%lu"
+cpu_frequency          "state=%lu cpu_id=%lu"
+
+A suspend event is used to indicate the system going in and out of the
+suspend mode:
+
+machine_suspend                "state=%lu"
+
+
+Note: the value of '-1' or '4294967295' for state means an exit from the current state,
+i.e. trace_cpu_idle(4, smp_processor_id()) means that the system
+enters the idle state 4, while trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id())
+means that the system exits the previous idle state.
+
+The event which has 'state=4294967295' in the trace is very important to the user
+space tools which are using it to detect the end of the current state, and so to
+correctly draw the states diagrams and to calculate accurate statistics etc.
+
+1.2 DEPRECATED trace API
+------------------------
+
+A new Kconfig option CONFIG_EVENT_POWER_TRACING_DEPRECATED with the default value of
+'y' has been created. This allows the legacy trace power API to be used conjointly
+with the new trace API.
+The Kconfig option, the old trace API (in include/trace/events/power.h) and the
+old trace points will disappear in a future release (namely 2.6.41).
+
+power_start            "type=%lu state=%lu cpu_id=%lu"
+power_frequency                "type=%lu state=%lu cpu_id=%lu"
+power_end              "cpu_id=%lu"
+
+The 'type' parameter takes one of those macros:
+ . POWER_NONE  = 0,
+ . POWER_CSTATE        = 1,    /* C-State */
+ . POWER_PSTATE        = 2,    /* Fequency change or DVFS */
+
+The 'state' parameter is set depending on the type:
+ . Target C-state for type=POWER_CSTATE,
+ . Target frequency for type=POWER_PSTATE,
+
+power_end is used to indicate the exit of a state, corresponding to the latest
+power_start event.
+
+2. Clocks events
+================
+The clock events are used for clock enable/disable and for
+clock rate change.
+
+clock_enable           "%s state=%lu cpu_id=%lu"
+clock_disable          "%s state=%lu cpu_id=%lu"
+clock_set_rate         "%s state=%lu cpu_id=%lu"
+
+The first parameter gives the clock name (e.g. "gpio1_iclk").
+The second parameter is '1' for enable, '0' for disable, the target
+clock rate for set_rate.
+
+3. Power domains events
+=======================
+The power domain events are used for power domains transitions
+
+power_domain_target    "%s state=%lu cpu_id=%lu"
+
+The first parameter gives the power domain name (e.g. "mpu_pwrdm").
+The second parameter is the power domain target state.
+
index b3e73ddb1567902fdeb9d65e1a661db9bca7780d..12cecc83cd91c658c71524bba59762b83f810829 100644 (file)
@@ -373,9 +373,18 @@ EVENT_PROCESS:
                                print "         $regex_lru_isolate/o\n";
                                next;
                        }
+                       my $isolate_mode = $1;
                        my $nr_scanned = $4;
                        my $nr_contig_dirty = $7;
-                       $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
+
+                       # To closer match vmstat scanning statistics, only count isolate_both
+                       # and isolate_inactive as scanning. isolate_active is rotation
+                       # isolate_inactive == 0
+                       # isolate_active   == 1
+                       # isolate_both     == 2
+                       if ($isolate_mode != 1) {
+                               $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
+                       }
                        $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty;
                } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
                        $details = $5;
index 30b43e1b26979cee024aa7636e250520b8ba235f..bdeb81ccb5f61b973280f9704f28a9f9cd8cc2e1 100644 (file)
@@ -600,6 +600,7 @@ Protocol:   2.07+
   0x00000001   lguest
   0x00000002   Xen
   0x00000003   Moorestown MID
+  0x00000004   CE4100 TV Platform
 
 Field name:    hardware_subarch_data
 Type:          write (subarch-dependent)
index 6a588873cf8d2da8b00f803a1dcd8d96841292bd..c5c7292daba076e439843e3b8c5c09a5d96d2285 100644 (file)
@@ -405,7 +405,7 @@ S:  Supported
 F:     drivers/usb/gadget/amd5536udc.*
 
 AMD GEODE PROCESSOR/CHIPSET SUPPORT
-P:     Jordan Crouse
+P:     Andres Salomon <dilinger@queued.net>
 L:     linux-geode@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
 S:     Supported
@@ -792,11 +792,14 @@ S:        Maintained
 
 ARM/NOMADIK ARCHITECTURE
 M:     Alessandro Rubini <rubini@unipv.it>
+M:     Linus Walleij <linus.walleij@stericsson.com>
 M:     STEricsson <STEricsson_nomadik_linux@list.st.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-nomadik/
 F:     arch/arm/plat-nomadik/
+F:     drivers/i2c/busses/i2c-nomadik.c
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 
 ARM/OPENMOKO NEO FREERUNNER (GTA02) MACHINE SUPPORT
 M:     Nelson Castillo <arhuaco@freaks-unidos.net>
@@ -998,12 +1001,24 @@ F:       drivers/i2c/busses/i2c-stu300.c
 F:     drivers/rtc/rtc-coh901331.c
 F:     drivers/watchdog/coh901327_wdt.c
 F:     drivers/dma/coh901318*
+F:     drivers/mfd/ab3100*
+F:     drivers/rtc/rtc-ab3100.c
+F:     drivers/rtc/rtc-coh901331.c
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 
-ARM/U8500 ARM ARCHITECTURE
+ARM/Ux500 ARM ARCHITECTURE
 M:     Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
+M:     Linus Walleij <linus.walleij@stericsson.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-ux500/
+F:     drivers/dma/ste_dma40*
+F:     drivers/mfd/ab3550*
+F:     drivers/mfd/abx500*
+F:     drivers/mfd/ab8500*
+F:     drivers/mfd/stmpe*
+F:     drivers/rtc/rtc-ab8500.c
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 
 ARM/VFP SUPPORT
 M:     Russell King <linux@arm.linux.org.uk>
@@ -2797,6 +2812,10 @@ M:       Thomas Gleixner <tglx@linutronix.de>
 S:     Maintained
 F:     Documentation/timers/
 F:     kernel/hrtimer.c
+F:     kernel/time/clockevents.c
+F:     kernel/time/tick*.*
+F:     kernel/time/timer_*.c
+F      include/linux/clockevents.h
 F:     include/linux/hrtimer.h
 
 HIGH-SPEED SCC DRIVER FOR AX.25
@@ -4590,7 +4609,7 @@ F:        drivers/pcmcia/
 F:     include/pcmcia/
 
 PCNET32 NETWORK DRIVER
-M:     Don Fry <pcnet32@verizon.net>
+M:     Don Fry <pcnet32@frontier.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/pcnet32.c
@@ -4612,7 +4631,7 @@ PERFORMANCE EVENTS SUBSYSTEM
 M:     Peter Zijlstra <a.p.zijlstra@chello.nl>
 M:     Paul Mackerras <paulus@samba.org>
 M:     Ingo Molnar <mingo@elte.hu>
-M:     Arnaldo Carvalho de Melo <acme@redhat.com>
+M:     Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
 S:     Supported
 F:     kernel/perf_event*.c
 F:     include/linux/perf_event.h
@@ -5127,6 +5146,18 @@ L:       alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Supported
 F:     sound/soc/s3c24xx
 
+TIMEKEEPING, NTP
+M:     John Stultz <johnstul@us.ibm.com>
+M:     Thomas Gleixner <tglx@linutronix.de>
+S:     Supported
+F:     include/linux/clocksource.h
+F:     include/linux/time.h
+F:     include/linux/timex.h
+F:     include/linux/timekeeping.h
+F:     kernel/time/clocksource.c
+F:     kernel/time/time*.c
+F:     kernel/time/ntp.c
+
 TLG2300 VIDEO4LINUX-2 DRIVER
 M:     Huang Shijie <shijie8@gmail.com>
 M:     Kang Yong <kangyong@telegent.com>
index 77044b7918a5cbe8439d9000175ca0b7f0ea4717..74b25559f831c5c48150789ed3230921635e0ae0 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 37
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Flesh-Eating Bats with Fangs
 
 # *DOCUMENTATION*
index 8bf0fa652eb63c57dec1ebfec1a93a407be4ed32..f78c2be4242b437ced3308795952102bf1359763 100644 (file)
@@ -175,4 +175,7 @@ config HAVE_PERF_EVENTS_NMI
 config HAVE_ARCH_JUMP_LABEL
        bool
 
+config HAVE_ARCH_MUTEX_CPU_RELAX
+       bool
+
 source "kernel/gcov/Kconfig"
index fe792ca818f64c4d9954e8b62e5f8064a5d5777e..5996e7a6757e4e058d4d7e7f9d10f76fbf6b6d20 100644 (file)
@@ -1,10 +1,4 @@
 #ifndef __ASM_ALPHA_PERF_EVENT_H
 #define __ASM_ALPHA_PERF_EVENT_H
 
-#ifdef CONFIG_PERF_EVENTS
-extern void init_hw_perf_events(void);
-#else
-static inline void init_hw_perf_events(void)    { }
-#endif
-
 #endif /* __ASM_ALPHA_PERF_EVENT_H */
index 5f77afb88e898b849f57e5c115f4688282e71c62..4c8bb374eb0a288d03d2cf5ad124e914638527f2 100644 (file)
@@ -112,8 +112,6 @@ init_IRQ(void)
        wrent(entInt, 0);
 
        alpha_mv.init_irq();
-
-       init_hw_perf_events();
 }
 
 /*
index 1cc49683fb69b2a5f96639e71a2f1af821479e77..90561c45e7d8928e8e137e33164d2d2d661a28b8 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/kdebug.h>
 #include <linux/mutex.h>
+#include <linux/init.h>
 
 #include <asm/hwrpb.h>
 #include <asm/atomic.h>
@@ -863,13 +864,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 /*
  * Init call to initialise performance events at kernel startup.
  */
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
        pr_info("Performance events: ");
 
        if (!supported_cpu()) {
                pr_cont("No support for your CPU.\n");
-               return;
+               return 0;
        }
 
        pr_cont("Supported CPU type!\n");
@@ -881,6 +882,8 @@ void __init init_hw_perf_events(void)
        /* And set up PMU specification */
        alpha_pmu = &ev67_pmu;
 
-       perf_pmu_register(&pmu);
-}
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
+       return 0;
+}
+early_initcall(init_hw_perf_events);
index 1bec96e851967101df7a796745b84d24bd320ab8..42ff90b46dfb1f1e0e91fdfe370f493e0b81551f 100644 (file)
@@ -352,3 +352,4 @@ struct pci_bus * __init it8152_pci_scan_bus(int nr, struct pci_sys_data *sys)
        return pci_scan_bus(nr, &it8152_ops, sys);
 }
 
+EXPORT_SYMBOL(dma_set_coherent_mask);
index 21fa272301f804b9bad3218b74147f6e450fa026..b2f95c72287c861dc03c09e110242982bb51a5c5 100644 (file)
@@ -76,6 +76,7 @@ extern unsigned long it8152_base_address;
   IT8152_PD_IRQ(0)  Audio controller (ACR)
  */
 #define IT8152_IRQ(x)   (IRQ_BOARD_START + (x))
+#define IT8152_LAST_IRQ        (IRQ_BOARD_START + 40)
 
 /* IRQ-sources in 3 groups - local devices, LPC (serial), and external PCI */
 #define IT8152_LD_IRQ_COUNT     9
index 1fc684e70ab6a6c9915ad5fbd302066ccb050a29..7080e2c8fa6209e0445b59a7b33ba8bd825797af 100644 (file)
@@ -25,9 +25,6 @@ extern void *kmap_high(struct page *page);
 extern void *kmap_high_get(struct page *page);
 extern void kunmap_high(struct page *page);
 
-extern void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte);
-extern void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte);
-
 /*
  * The following functions are already defined by <linux/highmem.h>
  * when CONFIG_HIGHMEM is not set.
index 4fc1565e4f930860722bca331330d0606e494d1c..316bb2b2be3dd7ca64638ea2d13d502e66ee5fde 100644 (file)
@@ -13,9 +13,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-/* DO NOT EDIT!! - this file automatically generated
- *                 from .s file by awk -f s2h.awk
- */
 /*  Size definitions
  *  Copyright (C) ARM Limited 1998. All rights reserved.
  */
@@ -25,6 +22,9 @@
 
 /* handy sizes */
 #define SZ_16                          0x00000010
+#define SZ_32                          0x00000020
+#define SZ_64                          0x00000040
+#define SZ_128                         0x00000080
 #define SZ_256                         0x00000100
 #define SZ_512                         0x00000200
 
index 1120f18a6b17695e48c37a6b4d1d7e50d306e478..80025948b8ad378e29d162030e962bb1dd462255 100644 (file)
@@ -150,6 +150,7 @@ extern unsigned int user_debug;
 #define rmb()          dmb()
 #define wmb()          mb()
 #else
+#include <asm/memory.h>
 #define mb()   do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
 #define rmb()  do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
 #define wmb()  do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
index 8bfa98757cd2f3fc9ef128011944a55fa7838ffd..80bf8cd88d7c522c319354c75b13d5484b9ede96 100644 (file)
@@ -29,6 +29,9 @@ ret_fast_syscall:
        ldr     r1, [tsk, #TI_FLAGS]
        tst     r1, #_TIF_WORK_MASK
        bne     fast_work_pending
+#if defined(CONFIG_IRQSOFF_TRACER)
+       asm_trace_hardirqs_on
+#endif
 
        /* perform architecture specific actions before user return */
        arch_ret_to_user r1, lr
@@ -65,6 +68,9 @@ ret_slow_syscall:
        tst     r1, #_TIF_WORK_MASK
        bne     work_pending
 no_work_pending:
+#if defined(CONFIG_IRQSOFF_TRACER)
+       asm_trace_hardirqs_on
+#endif
        /* perform architecture specific actions before user return */
        arch_ret_to_user r1, lr
 
index 07a50357492ac6858bc21d0b7913aefc93cbb1cf..fdfa4976b0bfeca637178609bb5e8d51e63ac672 100644 (file)
@@ -3034,11 +3034,11 @@ init_hw_perf_events(void)
                pr_info("no hardware support available\n");
        }
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
        return 0;
 }
-arch_initcall(init_hw_perf_events);
+early_initcall(init_hw_perf_events);
 
 /*
  * Callchain handling code.
index 8c1959590252e7161f1da38497eddba9b0538afb..9066473c0ebc3991a2f793e8c78616e26b31decb 100644 (file)
@@ -310,7 +310,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
         * All kernel threads share the same mm context; grab a
         * reference and switch to it.
         */
-       atomic_inc(&mm->mm_users);
        atomic_inc(&mm->mm_count);
        current->active_mm = mm;
        cpumask_set_cpu(cpu, mm_cpumask(mm));
index 57f8ee154943d91b50882a7b01ab2d602d5ba631..27ac6f550fe36eb686e8c180a727c8bf29d2c286 100644 (file)
@@ -74,6 +74,8 @@
 #define                        AT91_MCI_TRTYP_BLOCK    (0 << 19)
 #define                        AT91_MCI_TRTYP_MULTIPLE (1 << 19)
 #define                        AT91_MCI_TRTYP_STREAM   (2 << 19)
+#define                        AT91_MCI_TRTYP_SDIO_BYTE        (4 << 19)
+#define                        AT91_MCI_TRTYP_SDIO_BLOCK       (5 << 19)
 
 #define AT91_MCI_BLKR          0x18            /* Block Register */
 #define                AT91_MCI_BLKR_BCNT(n)   ((0xffff & (n)) << 0)   /* Block count */
index 24498a932ba65b8054de5ba8229ef0e0c838dd71..a54b3db8036678c8c385705b7c22b763769bb515 100644 (file)
@@ -513,4 +513,4 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
 
 EXPORT_SYMBOL(ixp4xx_pci_read);
 EXPORT_SYMBOL(ixp4xx_pci_write);
-
+EXPORT_SYMBOL(dma_set_coherent_mask);
index dd235ecc9d6c5946d6610bb7adef42cb57476210..c93e73d54dd1e7c5cebf41b13bc31b37a600391d 100644 (file)
@@ -540,6 +540,7 @@ config MACH_ICONTROL
 config ARCH_PXA_ESERIES
        bool "PXA based Toshiba e-series PDAs"
        select PXA25x
+       select FB_W100
 
 config MACH_E330
        bool "Toshiba e330"
index 52c30b01a67139e88fdb71af7dd7371525167bfd..ae008110db4edd934716e7b0a07d26aab1d84813 100644 (file)
@@ -353,8 +353,8 @@ resume_turn_on_mmu:
 
        @ Let us ensure we jump to resume_after_mmu only when the mcr above
        @ actually took effect.  They call it the "cpwait" operation.
-       mrc     p15, 0, r1, c2, c0, 0           @ queue a dependency on CP15
-       sub     pc, r2, r1, lsr #32             @ jump to virtual addr
+       mrc     p15, 0, r0, c2, c0, 0           @ queue a dependency on CP15
+       sub     pc, r2, r0, lsr #32             @ jump to virtual addr
        nop
        nop
        nop
index 6e77c042d8e9417ad5b9141c6eab37767693e3ed..e0b0e7a4ec68a3c577959e9116b9c7fa5d0be09d 100644 (file)
  */
 
 #include <linux/init.h>
+#include <linux/highmem.h>
 #include <asm/cacheflush.h>
-#include <asm/kmap_types.h>
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
 #include <plat/cache-feroceon-l2.h>
-#include "mm.h"
 
 /*
  * Low-level cache maintenance operations.
  * between which we don't want to be preempted.
  */
 
-static inline unsigned long l2_start_va(unsigned long paddr)
+static inline unsigned long l2_get_va(unsigned long paddr)
 {
 #ifdef CONFIG_HIGHMEM
        /*
-        * Let's do our own fixmap stuff in a minimal way here.
         * Because range ops can't be done on physical addresses,
         * we simply install a virtual mapping for it only for the
         * TLB lookup to occur, hence no need to flush the untouched
-        * memory mapping.  This is protected with the disabling of
-        * interrupts by the caller.
+        * memory mapping afterwards (note: a cache flush may happen
+        * in some circumstances depending on the path taken in kunmap_atomic).
         */
-       unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
-       unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-       set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0);
-       local_flush_tlb_kernel_page(vaddr);
-       return vaddr + (paddr & ~PAGE_MASK);
+       void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT);
+       return (unsigned long)vaddr + (paddr & ~PAGE_MASK);
 #else
        return __phys_to_virt(paddr);
 #endif
 }
 
+static inline void l2_put_va(unsigned long vaddr)
+{
+#ifdef CONFIG_HIGHMEM
+       kunmap_atomic((void *)vaddr);
+#endif
+}
+
 static inline void l2_clean_pa(unsigned long addr)
 {
        __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr));
@@ -76,13 +75,14 @@ static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
         */
        BUG_ON((start ^ end) >> PAGE_SHIFT);
 
-       raw_local_irq_save(flags);
-       va_start = l2_start_va(start);
+       va_start = l2_get_va(start);
        va_end = va_start + (end - start);
+       raw_local_irq_save(flags);
        __asm__("mcr p15, 1, %0, c15, c9, 4\n\t"
                "mcr p15, 1, %1, c15, c9, 5"
                : : "r" (va_start), "r" (va_end));
        raw_local_irq_restore(flags);
+       l2_put_va(va_start);
 }
 
 static inline void l2_clean_inv_pa(unsigned long addr)
@@ -106,13 +106,14 @@ static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
         */
        BUG_ON((start ^ end) >> PAGE_SHIFT);
 
-       raw_local_irq_save(flags);
-       va_start = l2_start_va(start);
+       va_start = l2_get_va(start);
        va_end = va_start + (end - start);
+       raw_local_irq_save(flags);
        __asm__("mcr p15, 1, %0, c15, c11, 4\n\t"
                "mcr p15, 1, %1, c15, c11, 5"
                : : "r" (va_start), "r" (va_end));
        raw_local_irq_restore(flags);
+       l2_put_va(va_start);
 }
 
 static inline void l2_inv_all(void)
index c3154928bccdf9750bef6da4e4fc29c2aad732f5..5a32020471e3bab2fc1e966a6e968c17a87f1d37 100644 (file)
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/init.h>
+#include <linux/highmem.h>
 #include <asm/system.h>
 #include <asm/cputype.h>
 #include <asm/cacheflush.h>
-#include <asm/kmap_types.h>
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include "mm.h"
 
 #define CR_L2  (1 << 26)
 
@@ -71,16 +67,15 @@ static inline void xsc3_l2_inv_all(void)
        dsb();
 }
 
+static inline void l2_unmap_va(unsigned long va)
+{
 #ifdef CONFIG_HIGHMEM
-#define l2_map_save_flags(x)           raw_local_save_flags(x)
-#define l2_map_restore_flags(x)                raw_local_irq_restore(x)
-#else
-#define l2_map_save_flags(x)           ((x) = 0)
-#define l2_map_restore_flags(x)                ((void)(x))
+       if (va != -1)
+               kunmap_atomic((void *)va);
 #endif
+}
 
-static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
-                                     unsigned long flags)
+static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va)
 {
 #ifdef CONFIG_HIGHMEM
        unsigned long va = prev_va & PAGE_MASK;
@@ -89,17 +84,10 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
                /*
                 * Switching to a new page.  Because cache ops are
                 * using virtual addresses only, we must put a mapping
-                * in place for it.  We also enable interrupts for a
-                * short while and disable them again to protect this
-                * mapping.
+                * in place for it.
                 */
-               unsigned long idx;
-               raw_local_irq_restore(flags);
-               idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
-               va = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-               raw_local_irq_restore(flags | PSR_I_BIT);
-               set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0);
-               local_flush_tlb_kernel_page(va);
+               l2_unmap_va(prev_va);
+               va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT);
        }
        return va + (pa_offset >> (32 - PAGE_SHIFT));
 #else
@@ -109,7 +97,7 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
 
 static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
 {
-       unsigned long vaddr, flags;
+       unsigned long vaddr;
 
        if (start == 0 && end == -1ul) {
                xsc3_l2_inv_all();
@@ -117,13 +105,12 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
        }
 
        vaddr = -1;  /* to force the first mapping */
-       l2_map_save_flags(flags);
 
        /*
         * Clean and invalidate partial first cache line.
         */
        if (start & (CACHE_LINE_SIZE - 1)) {
-               vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags);
+               vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr);
                xsc3_l2_clean_mva(vaddr);
                xsc3_l2_inv_mva(vaddr);
                start = (start | (CACHE_LINE_SIZE - 1)) + 1;
@@ -133,7 +120,7 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
         * Invalidate all full cache lines between 'start' and 'end'.
         */
        while (start < (end & ~(CACHE_LINE_SIZE - 1))) {
-               vaddr = l2_map_va(start, vaddr, flags);
+               vaddr = l2_map_va(start, vaddr);
                xsc3_l2_inv_mva(vaddr);
                start += CACHE_LINE_SIZE;
        }
@@ -142,31 +129,30 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
         * Clean and invalidate partial last cache line.
         */
        if (start < end) {
-               vaddr = l2_map_va(start, vaddr, flags);
+               vaddr = l2_map_va(start, vaddr);
                xsc3_l2_clean_mva(vaddr);
                xsc3_l2_inv_mva(vaddr);
        }
 
-       l2_map_restore_flags(flags);
+       l2_unmap_va(vaddr);
 
        dsb();
 }
 
 static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
 {
-       unsigned long vaddr, flags;
+       unsigned long vaddr;
 
        vaddr = -1;  /* to force the first mapping */
-       l2_map_save_flags(flags);
 
        start &= ~(CACHE_LINE_SIZE - 1);
        while (start < end) {
-               vaddr = l2_map_va(start, vaddr, flags);
+               vaddr = l2_map_va(start, vaddr);
                xsc3_l2_clean_mva(vaddr);
                start += CACHE_LINE_SIZE;
        }
 
-       l2_map_restore_flags(flags);
+       l2_unmap_va(vaddr);
 
        dsb();
 }
@@ -193,7 +179,7 @@ static inline void xsc3_l2_flush_all(void)
 
 static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
 {
-       unsigned long vaddr, flags;
+       unsigned long vaddr;
 
        if (start == 0 && end == -1ul) {
                xsc3_l2_flush_all();
@@ -201,17 +187,16 @@ static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
        }
 
        vaddr = -1;  /* to force the first mapping */
-       l2_map_save_flags(flags);
 
        start &= ~(CACHE_LINE_SIZE - 1);
        while (start < end) {
-               vaddr = l2_map_va(start, vaddr, flags);
+               vaddr = l2_map_va(start, vaddr);
                xsc3_l2_clean_mva(vaddr);
                xsc3_l2_inv_mva(vaddr);
                start += CACHE_LINE_SIZE;
        }
 
-       l2_map_restore_flags(flags);
+       l2_unmap_va(vaddr);
 
        dsb();
 }
index ac6a36142fcd5a28084b3c669fac9e800fd65497..809f1bf9fa29ef0fda15cf7563cffeed7a5bb98d 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/highmem.h>
 
 #include <asm/memory.h>
 #include <asm/highmem.h>
@@ -480,10 +481,10 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset,
                                op(vaddr, len, dir);
                                kunmap_high(page);
                        } else if (cache_is_vipt()) {
-                               pte_t saved_pte;
-                               vaddr = kmap_high_l1_vipt(page, &saved_pte);
+                               /* unmapped pages might still be cached */
+                               vaddr = kmap_atomic(page);
                                op(vaddr + offset, len, dir);
-                               kunmap_high_l1_vipt(page, saved_pte);
+                               kunmap_atomic(vaddr);
                        }
                } else {
                        vaddr = page_address(page) + offset;
index 391ffae750986404df8658d53e93fdc69b970ba4..c29f2839f1d2b72c8aa99e17db44e120a42bc7ee 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/highmem.h>
 
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
@@ -180,10 +181,10 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
                        __cpuc_flush_dcache_area(addr, PAGE_SIZE);
                        kunmap_high(page);
                } else if (cache_is_vipt()) {
-                       pte_t saved_pte;
-                       addr = kmap_high_l1_vipt(page, &saved_pte);
+                       /* unmapped pages might still be cached */
+                       addr = kmap_atomic(page);
                        __cpuc_flush_dcache_area(addr, PAGE_SIZE);
-                       kunmap_high_l1_vipt(page, saved_pte);
+                       kunmap_atomic(addr);
                }
        }
 
index c435fd9e1da95c9fdc9d7fab83b3a42caef1b905..807c0573abbe82533a884f87ea7ea243051a228d 100644 (file)
@@ -140,90 +140,3 @@ struct page *kmap_atomic_to_page(const void *ptr)
        pte = TOP_PTE(vaddr);
        return pte_page(*pte);
 }
-
-#ifdef CONFIG_CPU_CACHE_VIPT
-
-#include <linux/percpu.h>
-
-/*
- * The VIVT cache of a highmem page is always flushed before the page
- * is unmapped. Hence unmapped highmem pages need no cache maintenance
- * in that case.
- *
- * However unmapped pages may still be cached with a VIPT cache, and
- * it is not possible to perform cache maintenance on them using physical
- * addresses unfortunately.  So we have no choice but to set up a temporary
- * virtual mapping for that purpose.
- *
- * Yet this VIPT cache maintenance may be triggered from DMA support
- * functions which are possibly called from interrupt context. As we don't
- * want to keep interrupt disabled all the time when such maintenance is
- * taking place, we therefore allow for some reentrancy by preserving and
- * restoring the previous fixmap entry before the interrupted context is
- * resumed.  If the reentrancy depth is 0 then there is no need to restore
- * the previous fixmap, and leaving the current one in place allow it to
- * be reused the next time without a TLB flush (common with DMA).
- */
-
-static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth);
-
-void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte)
-{
-       unsigned int idx, cpu;
-       int *depth;
-       unsigned long vaddr, flags;
-       pte_t pte, *ptep;
-
-       if (!in_interrupt())
-               preempt_disable();
-
-       cpu = smp_processor_id();
-       depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
-
-       idx = KM_L1_CACHE + KM_TYPE_NR * cpu;
-       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-       ptep = TOP_PTE(vaddr);
-       pte = mk_pte(page, kmap_prot);
-
-       raw_local_irq_save(flags);
-       (*depth)++;
-       if (pte_val(*ptep) == pte_val(pte)) {
-               *saved_pte = pte;
-       } else {
-               *saved_pte = *ptep;
-               set_pte_ext(ptep, pte, 0);
-               local_flush_tlb_kernel_page(vaddr);
-       }
-       raw_local_irq_restore(flags);
-
-       return (void *)vaddr;
-}
-
-void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte)
-{
-       unsigned int idx, cpu = smp_processor_id();
-       int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
-       unsigned long vaddr, flags;
-       pte_t pte, *ptep;
-
-       idx = KM_L1_CACHE + KM_TYPE_NR * cpu;
-       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-       ptep = TOP_PTE(vaddr);
-       pte = mk_pte(page, kmap_prot);
-
-       BUG_ON(pte_val(*ptep) != pte_val(pte));
-       BUG_ON(*depth <= 0);
-
-       raw_local_irq_save(flags);
-       (*depth)--;
-       if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) {
-               set_pte_ext(ptep, saved_pte, 0);
-               local_flush_tlb_kernel_page(vaddr);
-       }
-       raw_local_irq_restore(flags);
-
-       if (!in_interrupt())
-               preempt_enable();
-}
-
-#endif  /* CONFIG_CPU_CACHE_VIPT */
index 5c7c6fc07565bd468a9f78b327073f0212795a42..183e0d226669193700c72f405f1e4e43303612a4 100644 (file)
@@ -1047,6 +1047,6 @@ init_hw_perf_events(void)
 
        return 0;
 }
-arch_initcall(init_hw_perf_events);
+early_initcall(init_hw_perf_events);
 
 #endif /* defined(CONFIG_CPU_MIPS32)... */
index c2e44597c22b1fc5b69ebf13cca307d43c2d1a52..ac11754ecec544c965c196f9dc857b2b398b22e0 100644 (file)
@@ -459,7 +459,7 @@ void migrate_irqs(void)
                        tmp = CROSS_GxICR(irq, new);
 
                        x &= GxICR_LEVEL | GxICR_ENABLE;
-                       if (GxICR(irq) & GxICR_REQUEST) {
+                       if (GxICR(irq) & GxICR_REQUEST)
                                x |= GxICR_REQUEST | GxICR_DETECT;
                        CROSS_GxICR(irq, new) = x;
                        tmp = CROSS_GxICR(irq, new);
index 7c07de0d89436ea4bac7350de09ab73d5f89b2e4..b150b510510f167d2782f645999303dfa297e2ad 100644 (file)
@@ -126,4 +126,4 @@ static int init_e500_pmu(void)
        return register_fsl_emb_pmu(&e500_pmu);
 }
 
-arch_initcall(init_e500_pmu);
+early_initcall(init_e500_pmu);
index 09d72028f317755428865f5105ee6b76b829ad7a..2cc5e0301d0b532a2291e400cb7bdc0a87aa89cd 100644 (file)
@@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void)
        return register_power_pmu(&mpc7450_pmu);
 }
 
-arch_initcall(init_mpc7450_pmu);
+early_initcall(init_mpc7450_pmu);
index 3129c855933c2a3857b0c4b3321b259b851279b8..5674807057899cd09dc366d128b1769afc6cd3e9 100644 (file)
@@ -1379,7 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
                freeze_events_kernel = MMCR0_FCHV;
 #endif /* CONFIG_PPC64 */
 
-       perf_pmu_register(&power_pmu);
+       perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
        perf_cpu_notifier(power_pmu_notifier);
 
        return 0;
index 7ecca59ddf77fe20bd46b470d9392cdd16fd5ba9..4dcf5f831e9d01f8694443ac7d56146b6f7a9777 100644 (file)
@@ -681,7 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
        pr_info("%s performance monitor hardware support registered\n",
                pmu->name);
 
-       perf_pmu_register(&fsl_emb_pmu);
+       perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
 
        return 0;
 }
index 2a361cdda635881ba4e96bf33f0a3dd6b8c273ea..ead8b3c2649ebba98c00423727e7dae54c6f5a7f 100644 (file)
@@ -613,4 +613,4 @@ static int init_power4_pmu(void)
        return register_power_pmu(&power4_pmu);
 }
 
-arch_initcall(init_power4_pmu);
+early_initcall(init_power4_pmu);
index 199de527d411446918651bd374bc5e9586ace46f..eca0ac595cb6c5b790ea4fd37d77ebb3a8ee474b 100644 (file)
@@ -682,4 +682,4 @@ static int init_power5p_pmu(void)
        return register_power_pmu(&power5p_pmu);
 }
 
-arch_initcall(init_power5p_pmu);
+early_initcall(init_power5p_pmu);
index 98b6a729a9dd127cc2c88e799b58bdbea2fa313c..d5ff0f64a5e645e01ddc6f9b56c91c60e14b204a 100644 (file)
@@ -621,4 +621,4 @@ static int init_power5_pmu(void)
        return register_power_pmu(&power5_pmu);
 }
 
-arch_initcall(init_power5_pmu);
+early_initcall(init_power5_pmu);
index 84a607bda8fbc129562943d7d2ce7fe0ee0f11bd..31603927e376e7e8854bf6a0faf86bc2bc9e56f7 100644 (file)
@@ -544,4 +544,4 @@ static int init_power6_pmu(void)
        return register_power_pmu(&power6_pmu);
 }
 
-arch_initcall(init_power6_pmu);
+early_initcall(init_power6_pmu);
index 852f7b7f6b4045801df807b997c7b110426ce7a6..593740fcb799d6fc9c29faca49425ad97b15b19a 100644 (file)
@@ -369,4 +369,4 @@ static int init_power7_pmu(void)
        return register_power_pmu(&power7_pmu);
 }
 
-arch_initcall(init_power7_pmu);
+early_initcall(init_power7_pmu);
index 3fee685de4df49e01a3a85ff069f3d409354c924..9a6e093858fe13fd30d2a79adb82e38e8c424b84 100644 (file)
@@ -494,4 +494,4 @@ static int init_ppc970_pmu(void)
        return register_power_pmu(&ppc970_pmu);
 }
 
-arch_initcall(init_ppc970_pmu);
+early_initcall(init_ppc970_pmu);
index fea833e18ad58ab92c78da66e35e2f23dcacd8ff..e0d703c7fdf7cc3e7d1491a963db90ad4735b9e4 100644 (file)
@@ -63,6 +63,7 @@
 #include <linux/of_gpio.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/fs.h>
 #include <linux/watchdog.h>
 #include <linux/miscdevice.h>
 #include <linux/uaccess.h>
index e0b98e71ff4797e5807f9e5d99f674aeb90456b8..6c6d7b339aae4f49fac84b7b3edc069f4ccf3adf 100644 (file)
@@ -99,6 +99,7 @@ config S390
        select HAVE_KERNEL_LZMA
        select HAVE_KERNEL_LZO
        select HAVE_GET_USER_PAGES_FAST
+       select HAVE_ARCH_MUTEX_CPU_RELAX
        select ARCH_INLINE_SPIN_TRYLOCK
        select ARCH_INLINE_SPIN_TRYLOCK_BH
        select ARCH_INLINE_SPIN_LOCK
index 458c1f7fbc1808d48982aa0c5fe89bfe3df2098c..688271f5f2e452b9951599550f33ed0ddcfe0a7c 100644 (file)
@@ -7,3 +7,5 @@
  */
 
 #include <asm-generic/mutex-dec.h>
+
+#define arch_mutex_cpu_relax() barrier()
index d961949600fd462199f3d9706e86371e815e7b1b..9070d7e607047fccec2d51b865dfe89307cd5d41 100644 (file)
@@ -140,7 +140,7 @@ void __init init_se7206_IRQ(void)
        make_se7206_irq(IRQ1_IRQ); /* ATA */
        make_se7206_irq(IRQ3_IRQ); /* SLOT / PCM */
 
-       __raw_writew(__raw_readw(INTC_ICR1) | 0x000b, INTC_ICR); /* ICR1 */
+       __raw_writew(__raw_readw(INTC_ICR1) | 0x000b, INTC_ICR1); /* ICR1 */
 
        /* FPGA System register setup*/
        __raw_writew(0x0000,INTSTS0); /* Clear INTSTS0 */
index b26264dc2aefef75e0292cb6c9a4862e8b4f9785..c509c40cba4bfefe3917f5eec9b2ddea3c63763d 100644 (file)
@@ -34,7 +34,7 @@ static const int pfc_divisors[]={1,2,3,4,6,8,12};
 
 static void master_clk_init(struct clk *clk)
 {
-       return 10000000 * PLL2 * pll1rate[(__raw_readw(FREQCR) >> 8) & 0x0007];
+       clk->rate = 10000000 * PLL2 * pll1rate[(__raw_readw(FREQCR) >> 8) & 0x0007];
 }
 
 static struct clk_ops sh7201_master_clk_ops = {
index b601fa3978d1995f53ea466e6aed5145e6ef1f75..6282a839e08e7831c40403c8a8c576ab97242df4 100644 (file)
@@ -81,8 +81,7 @@ static void shoc_clk_init(struct clk *clk)
        for (i = 0; i < ARRAY_SIZE(frqcr3_divisors); i++) {
                int divisor = frqcr3_divisors[i];
 
-               if (clk->ops->set_rate(clk, clk->parent->rate /
-                                               divisor, 0) == 0)
+               if (clk->ops->set_rate(clk, clk->parent->rate / divisor) == 0)
                        break;
        }
 
index dbf3b4bb71febb0ba38e9ec2f6c27731ea0970be..748955df018d801db05137f1831cf18f01938b55 100644 (file)
@@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void)
 
        return register_sh_pmu(&sh7750_pmu);
 }
-arch_initcall(sh7750_pmu_init);
+early_initcall(sh7750_pmu_init);
index 580276525731531643c9165d5c45ce28f5eade20..17e6bebfede067c26379efde6b8e0a69fc6565e9 100644 (file)
@@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void)
 
        return register_sh_pmu(&sh4a_pmu);
 }
-arch_initcall(sh4a_pmu_init);
+early_initcall(sh4a_pmu_init);
index 5a4b33435650c8ea108668d8e7e30786a20bd335..2ee21a47b5af6e1aac2889712c69848b7194f317 100644 (file)
@@ -389,7 +389,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
 
        WARN_ON(_pmu->num_events > MAX_HWEVENTS);
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
        perf_cpu_notifier(sh_pmu_notifier);
        return 0;
 }
index 6e8bfa1786dab1f45d3dff5a1dcacc31a08d4844..4d3dbe3703e9001f53f65f64d38629bda6dedb63 100644 (file)
@@ -4,8 +4,6 @@
 #ifdef CONFIG_PERF_EVENTS
 #include <asm/ptrace.h>
 
-extern void init_hw_perf_events(void);
-
 #define perf_arch_fetch_caller_regs(regs, ip)          \
 do {                                                   \
        unsigned long _pstate, _asi, _pil, _i7, _fp;    \
@@ -26,8 +24,6 @@ do {                                                  \
        (regs)->u_regs[UREG_I6] = _fp;                  \
        (regs)->u_regs[UREG_I7] = _i7;                  \
 } while (0)
-#else
-static inline void init_hw_perf_events(void)   { }
 #endif
 
 #endif
index a4bd7ba74c89d9f25221f29e616f49f26517aad5..300f810142f57e82cce5984a56d9e5e920aebb7d 100644 (file)
@@ -270,8 +270,6 @@ int __init nmi_init(void)
                        atomic_set(&nmi_active, -1);
                }
        }
-       if (!err)
-               init_hw_perf_events();
 
        return err;
 }
index 0d6deb55a2ae7e4189b5ab60aec81cd8df28adb6..760578687e7ca86cb0bb63cc7dc68721d51bee90 100644 (file)
@@ -1307,20 +1307,23 @@ static bool __init supported_pmu(void)
        return false;
 }
 
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
        pr_info("Performance events: ");
 
        if (!supported_pmu()) {
                pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
-               return;
+               return 0;
        }
 
        pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
        register_die_notifier(&perf_event_nmi_notifier);
+
+       return 0;
 }
+early_initcall(init_hw_perf_events);
 
 void perf_callchain_kernel(struct perf_callchain_entry *entry,
                           struct pt_regs *regs)
index e330da21b84f0636751b7e18e921ecff9cc31f55..b6fccb07123e206e23a4b73534d4891f66753b90 100644 (file)
@@ -377,6 +377,18 @@ config X86_ELAN
 
          If unsure, choose "PC-compatible" instead.
 
+config X86_INTEL_CE
+       bool "CE4100 TV platform"
+       depends on PCI
+       depends on PCI_GODIRECT
+       depends on X86_32
+       depends on X86_EXTENDED_PLATFORM
+       select X86_REBOOTFIXUPS
+       ---help---
+         Select for the Intel CE media processor (CE4100) SOC.
+         This option compiles in support for the CE4100 SOC for settop
+         boxes and media devices.
+
 config X86_MRST
        bool "Moorestown MID platform"
        depends on PCI
@@ -385,6 +397,10 @@ config X86_MRST
        depends on X86_EXTENDED_PLATFORM
        depends on X86_IO_APIC
        select APB_TIMER
+       select I2C
+       select SPI
+       select INTEL_SCU_IPC
+       select X86_PLATFORM_DEVICES
        ---help---
          Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
          Internet Device(MID) platform. Moorestown consists of two chips:
@@ -466,6 +482,19 @@ config X86_ES7000
          Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
          supposed to run on an IA32-based Unisys ES7000 system.
 
+config X86_32_IRIS
+       tristate "Eurobraille/Iris poweroff module"
+       depends on X86_32
+       ---help---
+         The Iris machines from EuroBraille do not have APM or ACPI support
+         to shut themselves down properly.  A special I/O sequence is
+         needed to do so, which is what this module does at
+         kernel shutdown.
+
+         This is only for Iris machines from EuroBraille.
+
+         If unused, say N.
+
 config SCHED_OMIT_FRAME_POINTER
        def_bool y
        prompt "Single-depth WCHAN output"
@@ -1141,16 +1170,16 @@ config NUMA
 comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
        depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
 
-config K8_NUMA
+config AMD_NUMA
        def_bool y
        prompt "Old style AMD Opteron NUMA detection"
        depends on X86_64 && NUMA && PCI
        ---help---
-         Enable K8 NUMA node topology detection.  You should say Y here if
-         you have a multi processor AMD K8 system. This uses an old
-         method to read the NUMA configuration directly from the builtin
-         Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
-         instead, which also takes priority if both are compiled in.
+         Enable AMD NUMA node topology detection.  You should say Y here if
+         you have a multi processor AMD system. This uses an old method to
+         read the NUMA configuration directly from the builtin Northbridge
+         of Opteron. It is recommended to use X86_64_ACPI_NUMA instead,
+         which also takes priority if both are compiled in.
 
 config X86_64_ACPI_NUMA
        def_bool y
index b59ee765414ea3891d6d4914485ba366fcee5663..45143bbcfe5e487d53e33bddaa7193ab68a5275e 100644 (file)
@@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST
          feature as well as for the change_page_attr() infrastructure.
          If in doubt, say "N"
 
+config DEBUG_SET_MODULE_RONX
+       bool "Set loadable kernel module data as NX and text as RO"
+       depends on MODULES
+       ---help---
+         This option helps catch unintended modifications to loadable
+         kernel module's text and read-only data. It also prevents execution
+         of module data. Such protection may interfere with run-time code
+         patching and dynamic kernel tracing - and they might also protect
+         against certain classes of kernel exploits.
+         If in doubt, say "N".
+
 config DEBUG_NX_TEST
        tristate "Testcase for the NX non-executable stack feature"
        depends on DEBUG_KERNEL && m
index 52f85a196fa033df961d20349ce6b7437409e843..35af09d13dc13b5d41ec7e19e066c7b5b676f30a 100644 (file)
@@ -182,7 +182,7 @@ no_longmode:
        hlt
        jmp     1b
 
-#include "../../kernel/verify_cpu_64.S"
+#include "../../kernel/verify_cpu.S"
 
        /*
         * Be careful here startup_64 needs to be at a predictable
index 76561d20ea2f27f0edfd0eee6d043b98c6aa6e90..13009d1af99a33e2bbee39fbe80a194fac85ece4 100644 (file)
@@ -66,6 +66,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
 extern void alternatives_smp_module_del(struct module *mod);
 extern void alternatives_smp_switch(int smp);
 extern int alternatives_text_reserved(void *start, void *end);
+extern bool skip_smp_alternatives;
 #else
 static inline void alternatives_smp_module_add(struct module *mod, char *name,
                                               void *locks, void *locks_end,
@@ -180,8 +181,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
  * On the local CPU you need to be protected again NMI or MCE handlers seeing an
  * inconsistent instruction while you patch.
  */
+struct text_poke_param {
+       void *addr;
+       const void *opcode;
+       size_t len;
+};
+
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
+extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
 #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
 #define IDEAL_NOP_SIZE_5 5
index c8517f81b21e73f9f2c428a26f2fb8995f73011f..6aee50d655d12f6792e495c5d5d004aaef9377e2 100644 (file)
@@ -3,36 +3,53 @@
 
 #include <linux/pci.h>
 
-extern struct pci_device_id k8_nb_ids[];
+extern struct pci_device_id amd_nb_misc_ids[];
 struct bootnode;
 
-extern int early_is_k8_nb(u32 value);
-extern int cache_k8_northbridges(void);
-extern void k8_flush_garts(void);
-extern int k8_get_nodes(struct bootnode *nodes);
-extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
-extern int k8_scan_nodes(void);
+extern int early_is_amd_nb(u32 value);
+extern int amd_cache_northbridges(void);
+extern void amd_flush_garts(void);
+extern int amd_get_nodes(struct bootnode *nodes);
+extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
+extern int amd_scan_nodes(void);
 
-struct k8_northbridge_info {
+struct amd_northbridge {
+       struct pci_dev *misc;
+};
+
+struct amd_northbridge_info {
        u16 num;
-       u8 gart_supported;
-       struct pci_dev **nb_misc;
+       u64 flags;
+       struct amd_northbridge *nb;
 };
-extern struct k8_northbridge_info k8_northbridges;
+extern struct amd_northbridge_info amd_northbridges;
+
+#define AMD_NB_GART                    0x1
+#define AMD_NB_L3_INDEX_DISABLE                0x2
 
 #ifdef CONFIG_AMD_NB
 
-static inline struct pci_dev *node_to_k8_nb_misc(int node)
+static inline int amd_nb_num(void)
 {
-       return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL;
+       return amd_northbridges.num;
 }
 
-#else
+static inline int amd_nb_has_feature(int feature)
+{
+       return ((amd_northbridges.flags & feature) == feature);
+}
 
-static inline struct pci_dev *node_to_k8_nb_misc(int node)
+static inline struct amd_northbridge *node_to_amd_nb(int node)
 {
-       return NULL;
+       return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
 }
+
+#else
+
+#define amd_nb_num(x)          0
+#define amd_nb_has_feature(x)  false
+#define node_to_amd_nb(x)      NULL
+
 #endif
 
 
index f6ce0bda3b98a74906cb1c8297f4ba150430699d..cf12007796db95f1a48635a55016ff01a0f811c2 100644 (file)
@@ -238,6 +238,7 @@ extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
 extern int APIC_init_uniprocessor(void);
 extern void enable_NMI_through_LVT0(void);
+extern int apic_force_enable(void);
 
 /*
  * On 32bit this is mach-xxx local
index a859ca461fb0432585f952e08337610b1165a204..47a30ff8e51782a31f146c78b458ea6a89bcacc3 100644 (file)
 
 #ifdef CONFIG_X86_32
 # define MAX_IO_APICS 64
+# define MAX_LOCAL_APIC 256
 #else
 # define MAX_IO_APICS 128
 # define MAX_LOCAL_APIC 32768
index 8e6218550e774b56fd30f3171bc163bfb511b947..c8bfe63a06de289057321af73c114e3521d9088a 100644 (file)
@@ -124,6 +124,7 @@ enum {
        X86_SUBARCH_LGUEST,
        X86_SUBARCH_XEN,
        X86_SUBARCH_MRST,
+       X86_SUBARCH_CE4100,
        X86_NR_SUBARCHS,
 };
 
index 9479a037419fe1358a96cece0d877a269c71e365..0141b234406fb01f8418320ea49c997fdbd14cbb 100644 (file)
@@ -117,6 +117,10 @@ enum fixed_addresses {
        FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
        FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
        __end_of_permanent_fixed_addresses,
+
+#ifdef CONFIG_X86_MRST
+       FIX_LNW_VRTC,
+#endif
        /*
         * 256 temporary boot-time mappings, used by early_ioremap(),
         * before ioremap() is functional.
index 4aa2bb3b242ab76733e0f7e5ba95454471297c1a..ef328901c80240f4a1471d3e4bdd795daffc6621 100644 (file)
@@ -93,6 +93,17 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
        int err;
 
        /* See comment in fxsave() below. */
+#ifdef CONFIG_AS_FXSAVEQ
+       asm volatile("1:  fxrstorq %[fx]\n\t"
+                    "2:\n"
+                    ".section .fixup,\"ax\"\n"
+                    "3:  movl $-1,%[err]\n"
+                    "    jmp  2b\n"
+                    ".previous\n"
+                    _ASM_EXTABLE(1b, 3b)
+                    : [err] "=r" (err)
+                    : [fx] "m" (*fx), "0" (0));
+#else
        asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
                     "2:\n"
                     ".section .fixup,\"ax\"\n"
@@ -102,6 +113,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
                     _ASM_EXTABLE(1b, 3b)
                     : [err] "=r" (err)
                     : [fx] "R" (fx), "m" (*fx), "0" (0));
+#endif
        return err;
 }
 
@@ -119,6 +131,17 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
                return -EFAULT;
 
        /* See comment in fxsave() below. */
+#ifdef CONFIG_AS_FXSAVEQ
+       asm volatile("1:  fxsaveq %[fx]\n\t"
+                    "2:\n"
+                    ".section .fixup,\"ax\"\n"
+                    "3:  movl $-1,%[err]\n"
+                    "    jmp  2b\n"
+                    ".previous\n"
+                    _ASM_EXTABLE(1b, 3b)
+                    : [err] "=r" (err), [fx] "=m" (*fx)
+                    : "0" (0));
+#else
        asm volatile("1:  rex64/fxsave (%[fx])\n\t"
                     "2:\n"
                     ".section .fixup,\"ax\"\n"
@@ -128,6 +151,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
                     _ASM_EXTABLE(1b, 3b)
                     : [err] "=r" (err), "=m" (*fx)
                     : [fx] "R" (fx), "0" (0));
+#endif
        if (unlikely(err) &&
            __clear_user(fx, sizeof(struct i387_fxsave_struct)))
                err = -EFAULT;
index a6b28d017c2fb9aae9e5f5d2bae03db4dcadbd43..0c5ca4e30d7bda949a3470ad6c89f391d19ac623 100644 (file)
@@ -159,7 +159,7 @@ struct io_apic_irq_attr;
 extern int io_apic_set_pci_routing(struct device *dev, int irq,
                 struct io_apic_irq_attr *irq_attr);
 void setup_IO_APIC_irq_extra(u32 gsi);
-extern void ioapic_init_mappings(void);
+extern void ioapic_and_gsi_init(void);
 extern void ioapic_insert_resources(void);
 
 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
@@ -168,10 +168,9 @@ extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 
-extern void probe_nr_irqs_gsi(void);
 extern int get_nr_irqs_gsi(void);
-
 extern void setup_ioapic_ids_from_mpc(void);
+extern void setup_ioapic_ids_from_mpc_nocheck(void);
 
 struct mp_ioapic_gsi{
        u32 gsi_base;
@@ -189,9 +188,8 @@ extern void __init pre_init_apic_IRQ0(void);
 #define io_apic_assign_pci_irqs 0
 #define setup_ioapic_ids_from_mpc x86_init_noop
 static const int timer_through_8259 = 0;
-static inline void ioapic_init_mappings(void)  { }
+static inline void ioapic_and_gsi_init(void) { }
 static inline void ioapic_insert_resources(void) { }
-static inline void probe_nr_irqs_gsi(void)     { }
 #define gsi_top (NR_IRQS_LEGACY)
 static inline int mp_find_ioapic(u32 gsi) { return 0; }
 
index 13b0ebaa512f77764e06956632b32755f8ca2bfa..ba870bb6dd8ef30ab81a317a8eb43dcb83066630 100644 (file)
@@ -15,10 +15,6 @@ static inline int irq_canonicalize(int irq)
        return ((irq == 2) ? 9 : irq);
 }
 
-#ifdef CONFIG_X86_LOCAL_APIC
-# define ARCH_HAS_NMI_WATCHDOG
-#endif
-
 #ifdef CONFIG_X86_32
 extern void irq_ctx_init(int cpu);
 #else
index 5bdfca86581beb3b45c60fd1f8d900a5daa68bf9..f23eb2528464f4a51ad6d14d70db0f7c33d92e0a 100644 (file)
@@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_registers(struct pt_regs *regs);
 extern void show_trace(struct task_struct *t, struct pt_regs *regs,
-                      unsigned long *sp, unsigned long bp);
+                      unsigned long *sp);
 extern void __show_regs(struct pt_regs *regs, int all);
 extern void show_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
index c62c13cb9788f0a1ea664fed073ebbca1ed02f15..eb16e94ae04f79927eb2c5afbf7b6ecd849b1621 100644 (file)
@@ -223,6 +223,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c);
 
 void mce_log_therm_throt_event(__u64 status);
 
+/* Interrupt Handler for core thermal thresholds */
+extern int (*platform_thermal_notify)(__u64 msr_val);
+
 #ifdef CONFIG_X86_THERMAL_VECTOR
 extern void mcheck_intel_therm_init(void);
 #else
index ef51b501e22a6e53bf4ae7e2d9e2566760f72ee1..24215072d0e1e5894d4643634bfe1ef9786eef55 100644 (file)
@@ -48,6 +48,12 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
 
 #ifdef CONFIG_MICROCODE_AMD
 extern struct microcode_ops * __init init_amd_microcode(void);
+
+static inline void get_ucode_data(void *to, const u8 *from, size_t n)
+{
+       memcpy(to, from, n);
+}
+
 #else
 static inline struct microcode_ops * __init init_amd_microcode(void)
 {
index c82868e9f905f04779778542298ce5560ae2e865..0c90dd9f05053c83591df6e04ec5d7979fee779f 100644 (file)
@@ -5,8 +5,9 @@
 
 #include <asm/mpspec_def.h>
 #include <asm/x86_init.h>
+#include <asm/apicdef.h>
 
-extern int apic_version[MAX_APICS];
+extern int apic_version[];
 extern int pic_mode;
 
 #ifdef CONFIG_X86_32
@@ -107,7 +108,7 @@ extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
                                 int active_high_low);
 #endif /* CONFIG_ACPI */
 
-#define PHYSID_ARRAY_SIZE      BITS_TO_LONGS(MAX_APICS)
+#define PHYSID_ARRAY_SIZE      BITS_TO_LONGS(MAX_LOCAL_APIC)
 
 struct physid_mask {
        unsigned long mask[PHYSID_ARRAY_SIZE];
@@ -122,31 +123,31 @@ typedef struct physid_mask physid_mask_t;
        test_and_set_bit(physid, (map).mask)
 
 #define physids_and(dst, src1, src2)                                   \
-       bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+       bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
 
 #define physids_or(dst, src1, src2)                                    \
-       bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+       bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
 
 #define physids_clear(map)                                     \
-       bitmap_zero((map).mask, MAX_APICS)
+       bitmap_zero((map).mask, MAX_LOCAL_APIC)
 
 #define physids_complement(dst, src)                           \
-       bitmap_complement((dst).mask, (src).mask, MAX_APICS)
+       bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC)
 
 #define physids_empty(map)                                     \
-       bitmap_empty((map).mask, MAX_APICS)
+       bitmap_empty((map).mask, MAX_LOCAL_APIC)
 
 #define physids_equal(map1, map2)                              \
-       bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
+       bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC)
 
 #define physids_weight(map)                                    \
-       bitmap_weight((map).mask, MAX_APICS)
+       bitmap_weight((map).mask, MAX_LOCAL_APIC)
 
 #define physids_shift_right(d, s, n)                           \
-       bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
+       bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC)
 
 #define physids_shift_left(d, s, n)                            \
-       bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
+       bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC)
 
 static inline unsigned long physids_coerce(physid_mask_t *map)
 {
@@ -159,14 +160,6 @@ static inline void physids_promote(unsigned long physids, physid_mask_t *map)
        map->mask[0] = physids;
 }
 
-/* Note: will create very large stack frames if physid_mask_t is big */
-#define physid_mask_of_physid(physid)                                  \
-       ({                                                              \
-               physid_mask_t __physid_mask = PHYSID_MASK_NONE;         \
-               physid_set(physid, __physid_mask);                      \
-               __physid_mask;                                          \
-       })
-
 static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
 {
        physids_clear(*map);
index 4a7f96d7c188edd92387cdec4a3be36e23028f35..c0a955a9a08784f662a071d976ef57bc0637c8c6 100644 (file)
 
 #ifdef CONFIG_X86_32
 # define MAX_MPC_ENTRY 1024
-# define MAX_APICS      256
-#else
-# if NR_CPUS <= 255
-#  define MAX_APICS     255
-# else
-#  define MAX_APICS   32768
-# endif
 #endif
 
 /* Intel MP Floating Pointer Structure */
diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/mrst-vrtc.h
new file mode 100644 (file)
index 0000000..73668ab
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _MRST_VRTC_H
+#define _MRST_VRTC_H
+
+extern unsigned char vrtc_cmos_read(unsigned char reg);
+extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
+extern unsigned long vrtc_get_time(void);
+extern int vrtc_set_mmss(unsigned long nowtime);
+
+#endif
index 4a711a684b174435bd5aae838515a836101eb389..719f00b28ff5358caf87d736ed5b4100dafdce9e 100644 (file)
@@ -14,7 +14,9 @@
 #include <linux/sfi.h>
 
 extern int pci_mrst_init(void);
-int __init sfi_parse_mrtc(struct sfi_table_header *table);
+extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
+extern int sfi_mrtc_num;
+extern struct sfi_rtc_table_entry sfi_mrtc_array[];
 
 /*
  * Medfield is the follow-up of Moorestown, it combines two chip solution into
@@ -50,4 +52,14 @@ extern void mrst_early_console_init(void);
 
 extern struct console early_hsu_console;
 extern void hsu_early_console_init(void);
+
+extern void intel_scu_devices_create(void);
+extern void intel_scu_devices_destroy(void);
+
+/* VRTC timer */
+#define MRST_VRTC_MAP_SZ       (1024)
+/*#define MRST_VRTC_PGOFFSET   (0xc00) */
+
+extern void mrst_rtc_init(void);
+
 #endif /* _ASM_X86_MRST_H */
index 6b89f5e860214266d7270160f739e9a9be290802..4d0dfa0d998e9f80ce244d86e1fd583513aaaaca 100644 (file)
 #define MSR_AMD64_IBSCTL               0xc001103a
 #define MSR_AMD64_IBSBRTARGET          0xc001103b
 
+/* Fam 15h MSRs */
+#define MSR_F15H_PERF_CTL              0xc0010200
+#define MSR_F15H_PERF_CTR              0xc0010201
+
 /* Fam 10h MSRs */
 #define MSR_FAM10H_MMIO_CONF_BASE      0xc0010058
 #define FAM10H_MMIO_CONF_ENABLE                (1<<0)
 #define PACKAGE_THERM_INT_LOW_ENABLE           (1 << 1)
 #define PACKAGE_THERM_INT_PLN_ENABLE           (1 << 24)
 
+/* Thermal Thresholds Support */
+#define THERM_INT_THRESHOLD0_ENABLE    (1 << 15)
+#define THERM_SHIFT_THRESHOLD0        8
+#define THERM_MASK_THRESHOLD0          (0x7f << THERM_SHIFT_THRESHOLD0)
+#define THERM_INT_THRESHOLD1_ENABLE    (1 << 23)
+#define THERM_SHIFT_THRESHOLD1        16
+#define THERM_MASK_THRESHOLD1          (0x7f << THERM_SHIFT_THRESHOLD1)
+#define THERM_STATUS_THRESHOLD0        (1 << 6)
+#define THERM_LOG_THRESHOLD0           (1 << 7)
+#define THERM_STATUS_THRESHOLD1        (1 << 8)
+#define THERM_LOG_THRESHOLD1           (1 << 9)
+
 /* MISC_ENABLE bits: architectural */
 #define MSR_IA32_MISC_ENABLE_FAST_STRING       (1ULL << 0)
 #define MSR_IA32_MISC_ENABLE_TCC               (1ULL << 1)
index 932f0f86b4b76252e6e6434ab9c15d81c3b17004..c4021b9535102547712c92203e8afe5809e567fc 100644 (file)
@@ -5,41 +5,15 @@
 #include <asm/irq.h>
 #include <asm/io.h>
 
-#ifdef ARCH_HAS_NMI_WATCHDOG
-
-/**
- * do_nmi_callback
- *
- * Check to see if a callback exists and execute it.  Return 1
- * if the handler exists and was handled successfully.
- */
-int do_nmi_callback(struct pt_regs *regs, int cpu);
+#ifdef CONFIG_X86_LOCAL_APIC
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
-extern int check_nmi_watchdog(void);
-#if !defined(CONFIG_LOCKUP_DETECTOR)
-extern int nmi_watchdog_enabled;
-#endif
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
 extern int reserve_evntsel_nmi(unsigned int);
 extern void release_evntsel_nmi(unsigned int);
 
-extern void setup_apic_nmi_watchdog(void *);
-extern void stop_apic_nmi_watchdog(void *);
-extern void disable_timer_nmi_watchdog(void);
-extern void enable_timer_nmi_watchdog(void);
-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
-extern void cpu_nmi_set_wd_enabled(void);
-
-extern atomic_t nmi_active;
-extern unsigned int nmi_watchdog;
-#define NMI_NONE       0
-#define NMI_IO_APIC    1
-#define NMI_LOCAL_APIC 2
-#define NMI_INVALID    3
-
 struct ctl_table;
 extern int proc_nmi_enabled(struct ctl_table *, int ,
                        void __user *, size_t *, loff_t *);
@@ -47,33 +21,8 @@ extern int unknown_nmi_panic;
 
 void arch_trigger_all_cpu_backtrace(void);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
-
-static inline void localise_nmi_watchdog(void)
-{
-       if (nmi_watchdog == NMI_IO_APIC)
-               nmi_watchdog = NMI_LOCAL_APIC;
-}
-
-/* check if nmi_watchdog is active (ie was specified at boot) */
-static inline int nmi_watchdog_active(void)
-{
-       /*
-        * actually it should be:
-        *      return (nmi_watchdog == NMI_LOCAL_APIC ||
-        *              nmi_watchdog == NMI_IO_APIC)
-        * but since they are power of two we could use a
-        * cheaper way --cvg
-        */
-       return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
-}
 #endif
 
-void lapic_watchdog_stop(void);
-int lapic_watchdog_init(unsigned nmi_hz);
-int lapic_wd_event(unsigned nmi_hz);
-unsigned lapic_adjust_nmi_hz(unsigned hz);
-void disable_lapic_nmi_watchdog(void);
-void enable_lapic_nmi_watchdog(void);
 void stop_nmi(void);
 void restart_nmi(void);
 
index ca0437c714b2aa3c94195a67dfe49e5eeb349b19..6761292296307163a0f5cbb4af1e27642d33da10 100644 (file)
@@ -65,6 +65,7 @@ extern unsigned long pci_mem_start;
 
 #define PCIBIOS_MIN_CARDBUS_IO 0x4000
 
+extern int pcibios_enabled;
 void pcibios_config_init(void);
 struct pci_bus *pcibios_scan_root(int bus);
 
index 550e26b1dbb3593f324910f0197402966ae91299..d9d4dae305f6991efa446ec0ef4e7fad8054bdc3 100644 (file)
@@ -125,7 +125,6 @@ union cpuid10_edx {
 #define IBS_OP_MAX_CNT_EXT     0x007FFFFFULL   /* not a register bit mask */
 
 #ifdef CONFIG_PERF_EVENTS
-extern void init_hw_perf_events(void);
 extern void perf_events_lapic_init(void);
 
 #define PERF_EVENT_INDEX_OFFSET                        0
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 }
 
 #else
-static inline void init_hw_perf_events(void)           { }
 static inline void perf_events_lapic_init(void)        { }
 #endif
 
index a70cd216be5d729db1f364340f911d632819f18d..295e2ff18a6a80be6ec3425d91e73239c76f1198 100644 (file)
@@ -744,14 +744,6 @@ enum P4_ESCR_EMASKS {
 };
 
 /*
- * P4 PEBS specifics (Replay Event only)
- *
- * Format (bits):
- *   0-6: metric from P4_PEBS_METRIC enum
- *    7 : reserved
- *    8 : reserved
- * 9-11 : reserved
- *
  * Note we have UOP and PEBS bits reserved for now
  * just in case if we will need them once
  */
@@ -788,5 +780,60 @@ enum P4_PEBS_METRIC {
        P4_PEBS_METRIC__max
 };
 
+/*
+ * Notes on internal configuration of ESCR+CCCR tuples
+ *
+ * Since P4 has quite the different architecture of
+ * performance registers in compare with "architectural"
+ * once and we have on 64 bits to keep configuration
+ * of performance event, the following trick is used.
+ *
+ * 1) Since both ESCR and CCCR registers have only low
+ *    32 bits valuable, we pack them into a single 64 bit
+ *    configuration. Low 32 bits of such config correspond
+ *    to low 32 bits of CCCR register and high 32 bits
+ *    correspond to low 32 bits of ESCR register.
+ *
+ * 2) The meaning of every bit of such config field can
+ *    be found in Intel SDM but it should be noted that
+ *    we "borrow" some reserved bits for own usage and
+ *    clean them or set to a proper value when we do
+ *    a real write to hardware registers.
+ *
+ * 3) The format of bits of config is the following
+ *    and should be either 0 or set to some predefined
+ *    values:
+ *
+ *    Low 32 bits
+ *    -----------
+ *      0-6: P4_PEBS_METRIC enum
+ *     7-11:                    reserved
+ *       12:                    reserved (Enable)
+ *    13-15:                    reserved (ESCR select)
+ *    16-17: Active Thread
+ *       18: Compare
+ *       19: Complement
+ *    20-23: Threshold
+ *       24: Edge
+ *       25:                    reserved (FORCE_OVF)
+ *       26:                    reserved (OVF_PMI_T0)
+ *       27:                    reserved (OVF_PMI_T1)
+ *    28-29:                    reserved
+ *       30:                    reserved (Cascade)
+ *       31:                    reserved (OVF)
+ *
+ *    High 32 bits
+ *    ------------
+ *        0:                    reserved (T1_USR)
+ *        1:                    reserved (T1_OS)
+ *        2:                    reserved (T0_USR)
+ *        3:                    reserved (T0_OS)
+ *        4: Tag Enable
+ *      5-8: Tag Value
+ *     9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper)
+ *    25-30: enum P4_EVENTS
+ *       31:                    reserved (HT thread)
+ */
+
 #endif /* PERF_EVENT_P4_H */
 
index d6763b139a844243b9fbb8dc620e633fe7b5825a..db8aa19a08a22d35e608625b251bcee7fa5fe1ee 100644 (file)
@@ -53,6 +53,12 @@ extern void x86_mrst_early_setup(void);
 static inline void x86_mrst_early_setup(void) { }
 #endif
 
+#ifdef CONFIG_X86_INTEL_CE
+extern void x86_ce4100_early_setup(void);
+#else
+static inline void x86_ce4100_early_setup(void) { }
+#endif
+
 #ifndef _SETUP
 
 /*
index 1def60114906bf1b1dfa98ef3bd8ac177f65010f..6c22bf353f26495b1fa71dc5a92cdaa05e5b1d8e 100644 (file)
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void)
                setup_IO_APIC();
        else {
                nr_ioapics = 0;
-               localise_nmi_watchdog();
        }
 #endif
 }
index 2b16a2ad23dc6b9647028c0808f8f45b094e74ac..52b5c7ed3608d9fc439c5ca69bf58ca1b8ebef88 100644 (file)
@@ -7,6 +7,7 @@
 #define _ASM_X86_STACKTRACE_H
 
 #include <linux/uaccess.h>
+#include <linux/ptrace.h>
 
 extern int kstack_depth_to_print;
 
@@ -46,7 +47,7 @@ struct stacktrace_ops {
 };
 
 void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp,
+               unsigned long *stack,
                const struct stacktrace_ops *ops, void *data);
 
 #ifdef CONFIG_X86_32
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
 #endif
 
+#ifdef CONFIG_FRAME_POINTER
+static inline unsigned long
+stack_frame(struct task_struct *task, struct pt_regs *regs)
+{
+       unsigned long bp;
+
+       if (regs)
+               return regs->bp;
+
+       if (task == current) {
+               /* Grab bp right from our regs */
+               get_bp(bp);
+               return bp;
+       }
+
+       /* bp is the last reg pushed by switch_to */
+       return *(unsigned long *)task->thread.sp;
+}
+#else
+static inline unsigned long
+stack_frame(struct task_struct *task, struct pt_regs *regs)
+{
+       return 0;
+}
+#endif
+
 extern void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp, char *log_lvl);
+                  unsigned long *stack, char *log_lvl);
 
 extern void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *sp, unsigned long bp, char *log_lvl);
+                  unsigned long *sp, char *log_lvl);
 
 extern unsigned int code_bytes;
 
index 5469630b27f56d732b10036ba381df39f28ca52b..fa7b9176b76cb33820034403fd8f4a50dc49709c 100644 (file)
 unsigned long long native_sched_clock(void);
 extern int recalibrate_cpu_khz(void);
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
-extern int timer_ack;
-#else
-# define timer_ack (0)
-#endif
-
 extern int no_timer_check;
 
 /* Accelerators for sched_clock()
index 42d412fd8b02cdd369b5cc8db5a67aa5cc7a0770..ce1d54c8a433a6b866977beeb9c6681e0db64746 100644 (file)
  * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
  * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
  *
- * We will use 31 sets, one for sending BAU messages from each of the 32
+ * We will use one set for sending BAU messages from each of the
  * cpu's on the uvhub.
  *
  * TLB shootdown will use the first of the 8 descriptors of each set.
  * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
  */
 
+#define MAX_CPUS_PER_UVHUB             64
+#define MAX_CPUS_PER_SOCKET            32
+#define UV_ADP_SIZE                    64 /* hardware-provided max. */
+#define UV_CPUS_PER_ACT_STATUS         32 /* hardware-provided max. */
 #define UV_ITEMS_PER_DESCRIPTOR                8
 /* the 'throttle' to prevent the hardware stay-busy bug */
 #define MAX_BAU_CONCURRENT             3
-#define UV_CPUS_PER_ACT_STATUS         32
 #define UV_ACT_STATUS_MASK             0x3
 #define UV_ACT_STATUS_SIZE             2
-#define UV_ADP_SIZE                    32
 #define UV_DISTRIBUTION_SIZE           256
 #define UV_SW_ACK_NPENDING             8
 #define UV_NET_ENDPOINT_INTD           0x38
  * number of destination side software ack resources
  */
 #define DEST_NUM_RESOURCES             8
-#define MAX_CPUS_PER_NODE              32
 /*
  * completion statuses for sending a TLB flush message
  */
index 1e994754d323f400b85c5d1d7350c6e52a702ff9..34244b2cd880cff373e744ec34193adb8287ab2a 100644 (file)
@@ -85,7 +85,6 @@ obj-$(CONFIG_DOUBLEFAULT)     += doublefault_32.o
 obj-$(CONFIG_KGDB)             += kgdb.o
 obj-$(CONFIG_VM86)             += vm86_32.o
 obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
-obj-$(CONFIG_EARLY_PRINTK_MRST)        += early_printk_mrst.o
 
 obj-$(CONFIG_HPET_TIMER)       += hpet.o
 obj-$(CONFIG_APB_TIMER)                += apb_timer.o
index 71232b941b6c9c6409fd14e9479f3d1625eeaeb0..17c8090fabd4703d324240328e2a15a55299bb70 100644 (file)
@@ -198,6 +198,11 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
 {
        unsigned int ver = 0;
 
+       if (id >= (MAX_LOCAL_APIC-1)) {
+               printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
+               return;
+       }
+
        if (!enabled) {
                ++disabled_cpus;
                return;
@@ -910,13 +915,13 @@ static int __init acpi_parse_madt_lapic_entries(void)
        acpi_register_lapic_address(acpi_lapic_addr);
 
        count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
-                                     acpi_parse_sapic, MAX_APICS);
+                                     acpi_parse_sapic, MAX_LOCAL_APIC);
 
        if (!count) {
                x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
-                                               acpi_parse_x2apic, MAX_APICS);
+                                       acpi_parse_x2apic, MAX_LOCAL_APIC);
                count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
-                                             acpi_parse_lapic, MAX_APICS);
+                                       acpi_parse_lapic, MAX_LOCAL_APIC);
        }
        if (!count && !x2count) {
                printk(KERN_ERR PREFIX "No LAPIC entries present\n");
index 5079f24c955a2d3b9b66532cd2c8cd45e5116d14..123608531c8f933b819a3fc7c135748137c8eb5b 100644 (file)
@@ -353,6 +353,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
        mutex_unlock(&smp_alt);
 }
 
+bool skip_smp_alternatives;
 void alternatives_smp_switch(int smp)
 {
        struct smp_alt_module *mod;
@@ -368,7 +369,7 @@ void alternatives_smp_switch(int smp)
        printk("lockdep: fixing up alternatives.\n");
 #endif
 
-       if (noreplace_smp || smp_alt_once)
+       if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
                return;
        BUG_ON(!smp && (num_online_cpus() > 1));
 
@@ -591,17 +592,21 @@ static atomic_t stop_machine_first;
 static int wrote_text;
 
 struct text_poke_params {
-       void *addr;
-       const void *opcode;
-       size_t len;
+       struct text_poke_param *params;
+       int nparams;
 };
 
 static int __kprobes stop_machine_text_poke(void *data)
 {
        struct text_poke_params *tpp = data;
+       struct text_poke_param *p;
+       int i;
 
        if (atomic_dec_and_test(&stop_machine_first)) {
-               text_poke(tpp->addr, tpp->opcode, tpp->len);
+               for (i = 0; i < tpp->nparams; i++) {
+                       p = &tpp->params[i];
+                       text_poke(p->addr, p->opcode, p->len);
+               }
                smp_wmb();      /* Make sure other cpus see that this has run */
                wrote_text = 1;
        } else {
@@ -610,8 +615,12 @@ static int __kprobes stop_machine_text_poke(void *data)
                smp_mb();       /* Load wrote_text before following execution */
        }
 
-       flush_icache_range((unsigned long)tpp->addr,
-                          (unsigned long)tpp->addr + tpp->len);
+       for (i = 0; i < tpp->nparams; i++) {
+               p = &tpp->params[i];
+               flush_icache_range((unsigned long)p->addr,
+                                  (unsigned long)p->addr + p->len);
+       }
+
        return 0;
 }
 
@@ -631,10 +640,13 @@ static int __kprobes stop_machine_text_poke(void *data)
 void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
 {
        struct text_poke_params tpp;
+       struct text_poke_param p;
 
-       tpp.addr = addr;
-       tpp.opcode = opcode;
-       tpp.len = len;
+       p.addr = addr;
+       p.opcode = opcode;
+       p.len = len;
+       tpp.params = &p;
+       tpp.nparams = 1;
        atomic_set(&stop_machine_first, 1);
        wrote_text = 0;
        /* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +654,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
        return addr;
 }
 
+/**
+ * text_poke_smp_batch - Update instructions on a live kernel on SMP
+ * @params: an array of text_poke parameters
+ * @n: the number of elements in params.
+ *
+ * Modify multi-byte instruction by using stop_machine() on SMP. Since the
+ * stop_machine() is heavy task, it is better to aggregate text_poke requests
+ * and do it once if possible.
+ *
+ * Note: Must be called under get_online_cpus() and text_mutex.
+ */
+void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
+{
+       struct text_poke_params tpp = {.params = params, .nparams = n};
+
+       atomic_set(&stop_machine_first, 1);
+       wrote_text = 0;
+       stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+}
+
 #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
 
 #ifdef CONFIG_X86_64
index 8f6463d8ed0de1ebfece6cb1138a15697f657197..affacb5e0065a1392713da260ecf8abfbab2c405 100644 (file)
 
 static u32 *flush_words;
 
-struct pci_device_id k8_nb_ids[] = {
+struct pci_device_id amd_nb_misc_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
        {}
 };
-EXPORT_SYMBOL(k8_nb_ids);
+EXPORT_SYMBOL(amd_nb_misc_ids);
 
-struct k8_northbridge_info k8_northbridges;
-EXPORT_SYMBOL(k8_northbridges);
+struct amd_northbridge_info amd_northbridges;
+EXPORT_SYMBOL(amd_northbridges);
 
-static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
+static struct pci_dev *next_northbridge(struct pci_dev *dev,
+                                       struct pci_device_id *ids)
 {
        do {
                dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
                if (!dev)
                        break;
-       } while (!pci_match_id(&k8_nb_ids[0], dev));
+       } while (!pci_match_id(ids, dev));
        return dev;
 }
 
-int cache_k8_northbridges(void)
+int amd_cache_northbridges(void)
 {
-       int i;
-       struct pci_dev *dev;
+       int i = 0;
+       struct amd_northbridge *nb;
+       struct pci_dev *misc;
 
-       if (k8_northbridges.num)
+       if (amd_nb_num())
                return 0;
 
-       dev = NULL;
-       while ((dev = next_k8_northbridge(dev)) != NULL)
-               k8_northbridges.num++;
+       misc = NULL;
+       while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
+               i++;
 
-       /* some CPU families (e.g. family 0x11) do not support GART */
-       if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
-           boot_cpu_data.x86 == 0x15)
-               k8_northbridges.gart_supported = 1;
+       if (i == 0)
+               return 0;
 
-       k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
-                                         sizeof(void *), GFP_KERNEL);
-       if (!k8_northbridges.nb_misc)
+       nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
+       if (!nb)
                return -ENOMEM;
 
-       if (!k8_northbridges.num) {
-               k8_northbridges.nb_misc[0] = NULL;
-               return 0;
-       }
+       amd_northbridges.nb = nb;
+       amd_northbridges.num = i;
 
-       if (k8_northbridges.gart_supported) {
-               flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
-                                     GFP_KERNEL);
-               if (!flush_words) {
-                       kfree(k8_northbridges.nb_misc);
-                       return -ENOMEM;
-               }
-       }
+       misc = NULL;
+       for (i = 0; i != amd_nb_num(); i++) {
+               node_to_amd_nb(i)->misc = misc =
+                       next_northbridge(misc, amd_nb_misc_ids);
+        }
+
+       /* some CPU families (e.g. family 0x11) do not support GART */
+       if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+           boot_cpu_data.x86 == 0x15)
+               amd_northbridges.flags |= AMD_NB_GART;
+
+       /*
+        * Some CPU families support L3 Cache Index Disable. There are some
+        * limitations because of E382 and E388 on family 0x10.
+        */
+       if (boot_cpu_data.x86 == 0x10 &&
+           boot_cpu_data.x86_model >= 0x8 &&
+           (boot_cpu_data.x86_model > 0x9 ||
+            boot_cpu_data.x86_mask >= 0x1))
+               amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
 
-       dev = NULL;
-       i = 0;
-       while ((dev = next_k8_northbridge(dev)) != NULL) {
-               k8_northbridges.nb_misc[i] = dev;
-               if (k8_northbridges.gart_supported)
-                       pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
-       }
-       k8_northbridges.nb_misc[i] = NULL;
        return 0;
 }
-EXPORT_SYMBOL_GPL(cache_k8_northbridges);
+EXPORT_SYMBOL_GPL(amd_cache_northbridges);
 
 /* Ignores subdevice/subvendor but as far as I can figure out
    they're useless anyways */
-int __init early_is_k8_nb(u32 device)
+int __init early_is_amd_nb(u32 device)
 {
        struct pci_device_id *id;
        u32 vendor = device & 0xffff;
        device >>= 16;
-       for (id = k8_nb_ids; id->vendor; id++)
+       for (id = amd_nb_misc_ids; id->vendor; id++)
                if (vendor == id->vendor && device == id->device)
                        return 1;
        return 0;
 }
 
-void k8_flush_garts(void)
+int amd_cache_gart(void)
+{
+       int i;
+
+       if (!amd_nb_has_feature(AMD_NB_GART))
+               return 0;
+
+       flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
+       if (!flush_words) {
+               amd_northbridges.flags &= ~AMD_NB_GART;
+               return -ENOMEM;
+       }
+
+       for (i = 0; i != amd_nb_num(); i++)
+               pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
+                                     &flush_words[i]);
+
+       return 0;
+}
+
+void amd_flush_garts(void)
 {
        int flushed, i;
        unsigned long flags;
        static DEFINE_SPINLOCK(gart_lock);
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return;
 
        /* Avoid races between AGP and IOMMU. In theory it's not needed
@@ -109,16 +130,16 @@ void k8_flush_garts(void)
           that it doesn't matter to serialize more. -AK */
        spin_lock_irqsave(&gart_lock, flags);
        flushed = 0;
-       for (i = 0; i < k8_northbridges.num; i++) {
-               pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
-                                      flush_words[i]|1);
+       for (i = 0; i < amd_nb_num(); i++) {
+               pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
+                                      flush_words[i] | 1);
                flushed++;
        }
-       for (i = 0; i < k8_northbridges.num; i++) {
+       for (i = 0; i < amd_nb_num(); i++) {
                u32 w;
                /* Make sure the hardware actually executed the flush*/
                for (;;) {
-                       pci_read_config_dword(k8_northbridges.nb_misc[i],
+                       pci_read_config_dword(node_to_amd_nb(i)->misc,
                                              0x9c, &w);
                        if (!(w & 1))
                                break;
@@ -129,19 +150,23 @@ void k8_flush_garts(void)
        if (!flushed)
                printk("nothing to flush?\n");
 }
-EXPORT_SYMBOL_GPL(k8_flush_garts);
+EXPORT_SYMBOL_GPL(amd_flush_garts);
 
-static __init int init_k8_nbs(void)
+static __init int init_amd_nbs(void)
 {
        int err = 0;
 
-       err = cache_k8_northbridges();
+       err = amd_cache_northbridges();
 
        if (err < 0)
-               printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n");
+               printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
+
+       if (amd_cache_gart() < 0)
+               printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
+                      "GART support disabled.\n");
 
        return err;
 }
 
 /* This has to go after the PCI subsystem */
-fs_initcall(init_k8_nbs);
+fs_initcall(init_amd_nbs);
index 92543c73cf8ed8d085dc581fe8171b3bbb6f939e..7c9ab59653e8bc5e229ba9e96734d20d4db50db5 100644 (file)
@@ -315,6 +315,7 @@ static void apbt_setup_irq(struct apbt_dev *adev)
 
        if (system_state == SYSTEM_BOOTING) {
                irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
+               irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
                /* APB timer irqs are set up as mp_irqs, timer is edge type */
                __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
                if (request_irq(adev->irq, apbt_interrupt_handler,
index b3a16e8f0703d47f50a354223bfe8c6e9382126e..dcd7c83e1659212ea5bab9d1ba0b1f0d8e4942c5 100644 (file)
@@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
  * Do an PCI bus scan by hand because we're running before the PCI
  * subsystem.
  *
- * All K8 AGP bridges are AGPv3 compliant, so we can do this scan
+ * All AMD AGP bridges are AGPv3 compliant, so we can do this scan
  * generically. It's probably overkill to always scan all slots because
  * the AGP bridges should be always an own bus on the HT hierarchy,
  * but do it here for future safety.
@@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void)
                dev_limit = bus_dev_ranges[i].dev_limit;
 
                for (slot = dev_base; slot < dev_limit; slot++) {
-                       if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                       if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
 
                        ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void)
                dev_limit = bus_dev_ranges[i].dev_limit;
 
                for (slot = dev_base; slot < dev_limit; slot++) {
-                       if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                       if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
 
                        ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void)
                dev_limit = bus_dev_ranges[i].dev_limit;
 
                for (slot = dev_base; slot < dev_limit; slot++) {
-                       if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                       if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
 
                        iommu_detected = 1;
@@ -518,7 +518,7 @@ out:
                dev_base = bus_dev_ranges[i].dev_base;
                dev_limit = bus_dev_ranges[i].dev_limit;
                for (slot = dev_base; slot < dev_limit; slot++) {
-                       if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                       if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
 
                        write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
index 910f20b457c464d34f1e9874269d652ea0da325e..3966b564ea478746bc77d66886249a294177b21a 100644 (file)
@@ -3,10 +3,7 @@
 #
 
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o apic_noop.o probe_$(BITS).o ipi.o
-ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
-obj-$(CONFIG_X86_LOCAL_APIC)   += nmi.o
-endif
-obj-$(CONFIG_HARDLOCKUP_DETECTOR)      += hw_nmi.o
+obj-y                          += hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o
 obj-$(CONFIG_SMP)              += ipi.o
index 78218135b48e6169d155fb4a097e5b6c8e30e53a..879999a5230fc613a0815cd056b4820ebb5cf95a 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/dmi.h>
-#include <linux/nmi.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
 
@@ -432,17 +431,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
        reserved = reserve_eilvt_offset(offset, new);
 
        if (reserved != new) {
-               pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but "
-                      "vector 0x%x was already reserved by another core, "
-                      "APIC%lX=0x%x\n",
-                      smp_processor_id(), new, reserved, reg, old);
+               pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+                      "vector 0x%x, but the register is already in use for "
+                      "vector 0x%x on another cpu\n",
+                      smp_processor_id(), reg, offset, new, reserved);
                return -EINVAL;
        }
 
        if (!eilvt_entry_is_changeable(old, new)) {
-               pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but "
-                      "register already in use, APIC%lX=0x%x\n",
-                      smp_processor_id(), new, reg, old);
+               pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+                      "vector 0x%x, but the register is already in use for "
+                      "vector 0x%x on this cpu\n",
+                      smp_processor_id(), reg, offset, new, old);
                return -EBUSY;
        }
 
@@ -799,11 +799,7 @@ void __init setup_boot_APIC_clock(void)
         * PIT/HPET going.  Otherwise register lapic as a dummy
         * device.
         */
-       if (nmi_watchdog != NMI_IO_APIC)
-               lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-       else
-               pr_warning("APIC timer registered as dummy,"
-                       " due to nmi_watchdog=%d!\n", nmi_watchdog);
+       lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 
        /* Setup the lapic or request the broadcast */
        setup_APIC_timer();
@@ -1387,7 +1383,6 @@ void __cpuinit end_local_APIC_setup(void)
        }
 #endif
 
-       setup_apic_nmi_watchdog(NULL);
        apic_pm_activate();
 
        /*
@@ -1538,13 +1533,60 @@ static int __init detect_init_APIC(void)
        return 0;
 }
 #else
+
+static int apic_verify(void)
+{
+       u32 features, h, l;
+
+       /*
+        * The APIC feature bit should now be enabled
+        * in `cpuid'
+        */
+       features = cpuid_edx(1);
+       if (!(features & (1 << X86_FEATURE_APIC))) {
+               pr_warning("Could not enable APIC!\n");
+               return -1;
+       }
+       set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+       /* The BIOS may have set up the APIC at some other address */
+       rdmsr(MSR_IA32_APICBASE, l, h);
+       if (l & MSR_IA32_APICBASE_ENABLE)
+               mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+
+       pr_info("Found and enabled local APIC!\n");
+       return 0;
+}
+
+int apic_force_enable(void)
+{
+       u32 h, l;
+
+       if (disable_apic)
+               return -1;
+
+       /*
+        * Some BIOSes disable the local APIC in the APIC_BASE
+        * MSR. This can only be done in software for Intel P6 or later
+        * and AMD K7 (Model > 1) or later.
+        */
+       rdmsr(MSR_IA32_APICBASE, l, h);
+       if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+               pr_info("Local APIC disabled by BIOS -- reenabling.\n");
+               l &= ~MSR_IA32_APICBASE_BASE;
+               l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+               wrmsr(MSR_IA32_APICBASE, l, h);
+               enabled_via_apicbase = 1;
+       }
+       return apic_verify();
+}
+
 /*
  * Detect and initialize APIC
  */
 static int __init detect_init_APIC(void)
 {
-       u32 h, l, features;
-
        /* Disabled by kernel option? */
        if (disable_apic)
                return -1;
@@ -1574,38 +1616,12 @@ static int __init detect_init_APIC(void)
                                "you can enable it with \"lapic\"\n");
                        return -1;
                }
-               /*
-                * Some BIOSes disable the local APIC in the APIC_BASE
-                * MSR. This can only be done in software for Intel P6 or later
-                * and AMD K7 (Model > 1) or later.
-                */
-               rdmsr(MSR_IA32_APICBASE, l, h);
-               if (!(l & MSR_IA32_APICBASE_ENABLE)) {
-                       pr_info("Local APIC disabled by BIOS -- reenabling.\n");
-                       l &= ~MSR_IA32_APICBASE_BASE;
-                       l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
-                       wrmsr(MSR_IA32_APICBASE, l, h);
-                       enabled_via_apicbase = 1;
-               }
-       }
-       /*
-        * The APIC feature bit should now be enabled
-        * in `cpuid'
-        */
-       features = cpuid_edx(1);
-       if (!(features & (1 << X86_FEATURE_APIC))) {
-               pr_warning("Could not enable APIC!\n");
-               return -1;
+               if (apic_force_enable())
+                       return -1;
+       } else {
+               if (apic_verify())
+                       return -1;
        }
-       set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-       /* The BIOS may have set up the APIC at some other address */
-       rdmsr(MSR_IA32_APICBASE, l, h);
-       if (l & MSR_IA32_APICBASE_ENABLE)
-               mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
-
-       pr_info("Found and enabled local APIC!\n");
 
        apic_pm_activate();
 
@@ -1693,7 +1709,7 @@ void __init init_apic_mappings(void)
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
-int apic_version[MAX_APICS];
+int apic_version[MAX_LOCAL_APIC];
 
 int __init APIC_init_uniprocessor(void)
 {
@@ -1758,17 +1774,10 @@ int __init APIC_init_uniprocessor(void)
                setup_IO_APIC();
        else {
                nr_ioapics = 0;
-               localise_nmi_watchdog();
        }
-#else
-       localise_nmi_watchdog();
 #endif
 
        x86_init.timers.setup_percpu_clockev();
-#ifdef CONFIG_X86_64
-       check_nmi_watchdog();
-#endif
-
        return 0;
 }
 
index 62f6e1e55b90d7f9a2bc460e73ba8895da23c4ff..72ec29e1ae0605990dc7f3b9fc1511bf5cab3cbe 100644 (file)
 #include <linux/nmi.h>
 #include <linux/module.h>
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 u64 hw_nmi_get_sample_period(void)
 {
        return (u64)(cpu_khz) * 1000 * 60;
 }
+#endif
 
-#ifdef ARCH_HAS_NMI_WATCHDOG
-
+#ifdef arch_trigger_all_cpu_backtrace
 /* For reliability, we're prepared to waste bits here. */
 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 
+/* "in progress" flag of arch_trigger_all_cpu_backtrace */
+static unsigned long backtrace_flag;
+
 void arch_trigger_all_cpu_backtrace(void)
 {
        int i;
 
+       if (test_and_set_bit(0, &backtrace_flag))
+               /*
+                * If there is already a trigger_all_cpu_backtrace() in progress
+                * (backtrace_flag == 1), don't output double cpu dump infos.
+                */
+               return;
+
        cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
 
        printk(KERN_INFO "sending NMI to all CPUs:\n");
@@ -42,6 +53,9 @@ void arch_trigger_all_cpu_backtrace(void)
                        break;
                mdelay(1);
        }
+
+       clear_bit(0, &backtrace_flag);
+       smp_mb__after_clear_bit();
 }
 
 static int __kprobes
@@ -50,7 +64,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
 {
        struct die_args *args = __args;
        struct pt_regs *regs;
-       int cpu = smp_processor_id();
+       int cpu;
 
        switch (cmd) {
        case DIE_NMI:
@@ -62,6 +76,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
        }
 
        regs = args->regs;
+       cpu = smp_processor_id();
 
        if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
                static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -91,18 +106,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
 }
 early_initcall(register_trigger_all_cpu_backtrace);
 #endif
-
-/* STUB calls to mimic old nmi_watchdog behaviour */
-#if defined(CONFIG_X86_LOCAL_APIC)
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-void acpi_nmi_enable(void) { return; }
-void acpi_nmi_disable(void) { return; }
-#endif
-atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-int unknown_nmi_panic;
-void cpu_nmi_set_wd_enabled(void) { return; }
-void stop_apic_nmi_watchdog(void *unused) { return; }
-void setup_apic_nmi_watchdog(void *unused) { return; }
-int __init check_nmi_watchdog(void) { return 0; }
index fadcd743a74f8bdcd5effbaf7e28b01ea3003532..f6cd5b41077034405045fec84fcde39a4b0b3212 100644 (file)
@@ -54,7 +54,6 @@
 #include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
-#include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
 #include <asm/setup.h>
@@ -1934,8 +1933,7 @@ void disable_IO_APIC(void)
  *
  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  */
-
-void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc_nocheck(void)
 {
        union IO_APIC_reg_00 reg_00;
        physid_mask_t phys_id_present_map;
@@ -1944,15 +1942,6 @@ void __init setup_ioapic_ids_from_mpc(void)
        unsigned char old_id;
        unsigned long flags;
 
-       if (acpi_ioapic)
-               return;
-       /*
-        * Don't check I/O APIC IDs for xAPIC systems.  They have
-        * no meaning without the serial APIC bus.
-        */
-       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-               return;
        /*
         * This is broken; anything with a real cpu count has to
         * circumvent this idiocy regardless.
@@ -2006,7 +1995,6 @@ void __init setup_ioapic_ids_from_mpc(void)
                        physids_or(phys_id_present_map, phys_id_present_map, tmp);
                }
 
-
                /*
                 * We need to adjust the IRQ routing table
                 * if the ID changed.
@@ -2042,6 +2030,21 @@ void __init setup_ioapic_ids_from_mpc(void)
                        apic_printk(APIC_VERBOSE, " ok.\n");
        }
 }
+
+void __init setup_ioapic_ids_from_mpc(void)
+{
+
+       if (acpi_ioapic)
+               return;
+       /*
+        * Don't check I/O APIC IDs for xAPIC systems.  They have
+        * no meaning without the serial APIC bus.
+        */
+       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               return;
+       setup_ioapic_ids_from_mpc_nocheck();
+}
 #endif
 
 int no_timer_check __initdata;
@@ -2642,24 +2645,6 @@ static void lapic_register_intr(int irq)
                                      "edge");
 }
 
-static void __init setup_nmi(void)
-{
-       /*
-        * Dirty trick to enable the NMI watchdog ...
-        * We put the 8259A master into AEOI mode and
-        * unmask on all local APICs LVT0 as NMI.
-        *
-        * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-        * is from Maciej W. Rozycki - so we do not have to EOI from
-        * the NMI handler or the timer interrupt.
-        */
-       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-       enable_NMI_through_LVT0();
-
-       apic_printk(APIC_VERBOSE, " done.\n");
-}
-
 /*
  * This looks a bit hackish but it's about the only one way of sending
  * a few INTA cycles to 8259As and any associated glue logic.  ICR does
@@ -2765,15 +2750,6 @@ static inline void __init check_timer(void)
         */
        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
        legacy_pic->init(1);
-#ifdef CONFIG_X86_32
-       {
-               unsigned int ver;
-
-               ver = apic_read(APIC_LVR);
-               ver = GET_APIC_VERSION(ver);
-               timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-       }
-#endif
 
        pin1  = find_isa_irq_pin(0, mp_INT);
        apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2821,10 +2797,6 @@ static inline void __init check_timer(void)
                                unmask_ioapic(cfg);
                }
                if (timer_irq_works()) {
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               setup_nmi();
-                               legacy_pic->unmask(0);
-                       }
                        if (disable_timer_pin_1 > 0)
                                clear_IO_APIC_pin(0, pin1);
                        goto out;
@@ -2850,11 +2822,6 @@ static inline void __init check_timer(void)
                if (timer_irq_works()) {
                        apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
                        timer_through_8259 = 1;
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               legacy_pic->mask(0);
-                               setup_nmi();
-                               legacy_pic->unmask(0);
-                       }
                        goto out;
                }
                /*
@@ -2866,15 +2833,6 @@ static inline void __init check_timer(void)
                apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
        }
 
-       if (nmi_watchdog == NMI_IO_APIC) {
-               apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-                           "through the IO-APIC - disabling NMI Watchdog!\n");
-               nmi_watchdog = NMI_NONE;
-       }
-#ifdef CONFIG_X86_32
-       timer_ack = 0;
-#endif
-
        apic_printk(APIC_QUIET, KERN_INFO
                    "...trying to set up timer as Virtual Wire IRQ...\n");
 
@@ -3639,7 +3597,7 @@ int __init io_apic_get_redir_entries (int ioapic)
        return reg_01.bits.entries + 1;
 }
 
-void __init probe_nr_irqs_gsi(void)
+static void __init probe_nr_irqs_gsi(void)
 {
        int nr;
 
@@ -3956,7 +3914,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
        return res;
 }
 
-void __init ioapic_init_mappings(void)
+void __init ioapic_and_gsi_init(void)
 {
        unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
        struct resource *ioapic_res;
@@ -3994,6 +3952,8 @@ fake_ioapic_page:
                ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
                ioapic_res++;
        }
+
+       probe_nr_irqs_gsi();
 }
 
 void __init ioapic_insert_resources(void)
@@ -4103,7 +4063,8 @@ void __init pre_init_apic_IRQ0(void)
 
        printk(KERN_INFO "Early APIC setup for system timer0\n");
 #ifndef CONFIG_SMP
-       phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+       physid_set_mask_of_physid(boot_cpu_physical_apicid,
+                                        &phys_cpu_present_map);
 #endif
        /* Make sure the irq descriptor is set up */
        cfg = alloc_irq_and_cfg_at(0, 0);
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644 (file)
index c90041c..0000000
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- *  NMI watchdog support on APIC systems
- *
- *  Started by Ingo Molnar <mingo@redhat.com>
- *
- *  Fixes:
- *  Mikael Pettersson  : AMD K7 support for local APIC NMI watchdog.
- *  Mikael Pettersson  : Power Management for local APIC NMI watchdog.
- *  Mikael Pettersson  : Pentium 4 support for local APIC NMI watchdog.
- *  Pavel Machek and
- *  Mikael Pettersson  : PM converted to driver model. Disable/enable API.
- */
-
-#include <asm/apic.h>
-
-#include <linux/nmi.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/sysdev.h>
-#include <linux/sysctl.h>
-#include <linux/percpu.h>
-#include <linux/kprobes.h>
-#include <linux/cpumask.h>
-#include <linux/kernel_stat.h>
-#include <linux/kdebug.h>
-#include <linux/smp.h>
-
-#include <asm/i8259.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/timer.h>
-
-#include <asm/mce.h>
-
-#include <asm/mach_traps.h>
-
-int unknown_nmi_panic;
-int nmi_watchdog_enabled;
-
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
-/* nmi_active:
- * >0: the lapic NMI watchdog is active, but can be disabled
- * <0: the lapic NMI watchdog has not been set up, and cannot
- *     be enabled
- *  0: the lapic NMI watchdog is disabled, but can be enabled
- */
-atomic_t nmi_active = ATOMIC_INIT(0);          /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-
-static int panic_on_timeout;
-
-static unsigned int nmi_hz = HZ;
-static DEFINE_PER_CPU(short, wd_enabled);
-static int endflag __initdata;
-
-static inline unsigned int get_nmi_count(int cpu)
-{
-       return per_cpu(irq_stat, cpu).__nmi_count;
-}
-
-static inline int mce_in_progress(void)
-{
-#if defined(CONFIG_X86_MCE)
-       return atomic_read(&mce_entry) > 0;
-#endif
-       return 0;
-}
-
-/*
- * Take the local apic timer and PIT/HPET into account. We don't
- * know which one is active, when we have highres/dyntick on
- */
-static inline unsigned int get_timer_irqs(int cpu)
-{
-       return per_cpu(irq_stat, cpu).apic_timer_irqs +
-               per_cpu(irq_stat, cpu).irq0_irqs;
-}
-
-#ifdef CONFIG_SMP
-/*
- * The performance counters used by NMI_LOCAL_APIC don't trigger when
- * the CPU is idle. To make sure the NMI watchdog really ticks on all
- * CPUs during the test make them busy.
- */
-static __init void nmi_cpu_busy(void *data)
-{
-       local_irq_enable_in_hardirq();
-       /*
-        * Intentionally don't use cpu_relax here. This is
-        * to make sure that the performance counter really ticks,
-        * even if there is a simulator or similar that catches the
-        * pause instruction. On a real HT machine this is fine because
-        * all other CPUs are busy with "useless" delay loops and don't
-        * care if they get somewhat less cycles.
-        */
-       while (endflag == 0)
-               mb();
-}
-#endif
-
-static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
-{
-       printk(KERN_CONT "\n");
-
-       printk(KERN_WARNING
-               "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
-                       cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
-
-       printk(KERN_WARNING
-               "Please report this to bugzilla.kernel.org,\n");
-       printk(KERN_WARNING
-               "and attach the output of the 'dmesg' command.\n");
-
-       per_cpu(wd_enabled, cpu) = 0;
-       atomic_dec(&nmi_active);
-}
-
-static void __acpi_nmi_disable(void *__unused)
-{
-       apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-int __init check_nmi_watchdog(void)
-{
-       unsigned int *prev_nmi_count;
-       int cpu;
-
-       if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
-               return 0;
-
-       prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
-       if (!prev_nmi_count)
-               goto error;
-
-       printk(KERN_INFO "Testing NMI watchdog ... ");
-
-#ifdef CONFIG_SMP
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
-#endif
-
-       for_each_possible_cpu(cpu)
-               prev_nmi_count[cpu] = get_nmi_count(cpu);
-       local_irq_enable();
-       mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
-
-       for_each_online_cpu(cpu) {
-               if (!per_cpu(wd_enabled, cpu))
-                       continue;
-               if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
-                       report_broken_nmi(cpu, prev_nmi_count);
-       }
-       endflag = 1;
-       if (!atomic_read(&nmi_active)) {
-               kfree(prev_nmi_count);
-               atomic_set(&nmi_active, -1);
-               goto error;
-       }
-       printk("OK.\n");
-
-       /*
-        * now that we know it works we can reduce NMI frequency to
-        * something more reasonable; makes a difference in some configs
-        */
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               nmi_hz = lapic_adjust_nmi_hz(1);
-
-       kfree(prev_nmi_count);
-       return 0;
-error:
-       if (nmi_watchdog == NMI_IO_APIC) {
-               if (!timer_through_8259)
-                       legacy_pic->mask(0);
-               on_each_cpu(__acpi_nmi_disable, NULL, 1);
-       }
-
-#ifdef CONFIG_X86_32
-       timer_ack = 0;
-#endif
-       return -1;
-}
-
-static int __init setup_nmi_watchdog(char *str)
-{
-       unsigned int nmi;
-
-       if (!strncmp(str, "panic", 5)) {
-               panic_on_timeout = 1;
-               str = strchr(str, ',');
-               if (!str)
-                       return 1;
-               ++str;
-       }
-
-       if (!strncmp(str, "lapic", 5))
-               nmi_watchdog = NMI_LOCAL_APIC;
-       else if (!strncmp(str, "ioapic", 6))
-               nmi_watchdog = NMI_IO_APIC;
-       else {
-               get_option(&str, &nmi);
-               if (nmi >= NMI_INVALID)
-                       return 0;
-               nmi_watchdog = nmi;
-       }
-
-       return 1;
-}
-__setup("nmi_watchdog=", setup_nmi_watchdog);
-
-/*
- * Suspend/resume support
- */
-#ifdef CONFIG_PM
-
-static int nmi_pm_active; /* nmi_active before suspend */
-
-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
-{
-       /* only CPU0 goes here, other CPUs should be offline */
-       nmi_pm_active = atomic_read(&nmi_active);
-       stop_apic_nmi_watchdog(NULL);
-       BUG_ON(atomic_read(&nmi_active) != 0);
-       return 0;
-}
-
-static int lapic_nmi_resume(struct sys_device *dev)
-{
-       /* only CPU0 goes here, other CPUs should be offline */
-       if (nmi_pm_active > 0) {
-               setup_apic_nmi_watchdog(NULL);
-               touch_nmi_watchdog();
-       }
-       return 0;
-}
-
-static struct sysdev_class nmi_sysclass = {
-       .name           = "lapic_nmi",
-       .resume         = lapic_nmi_resume,
-       .suspend        = lapic_nmi_suspend,
-};
-
-static struct sys_device device_lapic_nmi = {
-       .id     = 0,
-       .cls    = &nmi_sysclass,
-};
-
-static int __init init_lapic_nmi_sysfs(void)
-{
-       int error;
-
-       /*
-        * should really be a BUG_ON but b/c this is an
-        * init call, it just doesn't work.  -dcz
-        */
-       if (nmi_watchdog != NMI_LOCAL_APIC)
-               return 0;
-
-       if (atomic_read(&nmi_active) < 0)
-               return 0;
-
-       error = sysdev_class_register(&nmi_sysclass);
-       if (!error)
-               error = sysdev_register(&device_lapic_nmi);
-       return error;
-}
-
-/* must come after the local APIC's device_initcall() */
-late_initcall(init_lapic_nmi_sysfs);
-
-#endif /* CONFIG_PM */
-
-static void __acpi_nmi_enable(void *__unused)
-{
-       apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
-       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-               on_each_cpu(__acpi_nmi_enable, NULL, 1);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
-       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-               on_each_cpu(__acpi_nmi_disable, NULL, 1);
-}
-
-/*
- * This function is called as soon the LAPIC NMI watchdog driver has everything
- * in place and it's ready to check if the NMIs belong to the NMI watchdog
- */
-void cpu_nmi_set_wd_enabled(void)
-{
-       __get_cpu_var(wd_enabled) = 1;
-}
-
-void setup_apic_nmi_watchdog(void *unused)
-{
-       if (__get_cpu_var(wd_enabled))
-               return;
-
-       /* cheap hack to support suspend/resume */
-       /* if cpu0 is not active neither should the other cpus */
-       if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
-               return;
-
-       switch (nmi_watchdog) {
-       case NMI_LOCAL_APIC:
-               if (lapic_watchdog_init(nmi_hz) < 0) {
-                       __get_cpu_var(wd_enabled) = 0;
-                       return;
-               }
-               /* FALL THROUGH */
-       case NMI_IO_APIC:
-               __get_cpu_var(wd_enabled) = 1;
-               atomic_inc(&nmi_active);
-       }
-}
-
-void stop_apic_nmi_watchdog(void *unused)
-{
-       /* only support LOCAL and IO APICs for now */
-       if (!nmi_watchdog_active())
-               return;
-       if (__get_cpu_var(wd_enabled) == 0)
-               return;
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               lapic_watchdog_stop();
-       else
-               __acpi_nmi_disable(NULL);
-       __get_cpu_var(wd_enabled) = 0;
-       atomic_dec(&nmi_active);
-}
-
-/*
- * the best way to detect whether a CPU has a 'hard lockup' problem
- * is to check it's local APIC timer IRQ counts. If they are not
- * changing then that CPU has some problem.
- *
- * as these watchdog NMI IRQs are generated on every CPU, we only
- * have to check the current processor.
- *
- * since NMIs don't listen to _any_ locks, we have to be extremely
- * careful not to rely on unsafe variables. The printk might lock
- * up though, so we have to break up any console locks first ...
- * [when there will be more tty-related locks, break them up here too!]
- */
-
-static DEFINE_PER_CPU(unsigned, last_irq_sum);
-static DEFINE_PER_CPU(long, alert_counter);
-static DEFINE_PER_CPU(int, nmi_touch);
-
-void touch_nmi_watchdog(void)
-{
-       if (nmi_watchdog_active()) {
-               unsigned cpu;
-
-               /*
-                * Tell other CPUs to reset their alert counters. We cannot
-                * do it ourselves because the alert count increase is not
-                * atomic.
-                */
-               for_each_present_cpu(cpu) {
-                       if (per_cpu(nmi_touch, cpu) != 1)
-                               per_cpu(nmi_touch, cpu) = 1;
-               }
-       }
-
-       /*
-        * Tickle the softlockup detector too:
-        */
-       touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-notrace __kprobes int
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
-{
-       /*
-        * Since current_thread_info()-> is always on the stack, and we
-        * always switch the stack NMI-atomically, it's safe to use
-        * smp_processor_id().
-        */
-       unsigned int sum;
-       int touched = 0;
-       int cpu = smp_processor_id();
-       int rc = 0;
-
-       sum = get_timer_irqs(cpu);
-
-       if (__get_cpu_var(nmi_touch)) {
-               __get_cpu_var(nmi_touch) = 0;
-               touched = 1;
-       }
-
-       /* We can be called before check_nmi_watchdog, hence NULL check. */
-       if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-               static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
-
-               raw_spin_lock(&lock);
-               printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
-               show_regs(regs);
-               dump_stack();
-               raw_spin_unlock(&lock);
-               cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
-
-               rc = 1;
-       }
-
-       /* Could check oops_in_progress here too, but it's safer not to */
-       if (mce_in_progress())
-               touched = 1;
-
-       /* if the none of the timers isn't firing, this cpu isn't doing much */
-       if (!touched && __get_cpu_var(last_irq_sum) == sum) {
-               /*
-                * Ayiee, looks like this CPU is stuck ...
-                * wait a few IRQs (5 seconds) before doing the oops ...
-                */
-               __this_cpu_inc(alert_counter);
-               if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
-                       /*
-                        * die_nmi will return ONLY if NOTIFY_STOP happens..
-                        */
-                       die_nmi("BUG: NMI Watchdog detected LOCKUP",
-                               regs, panic_on_timeout);
-       } else {
-               __get_cpu_var(last_irq_sum) = sum;
-               __this_cpu_write(alert_counter, 0);
-       }
-
-       /* see if the nmi watchdog went off */
-       if (!__get_cpu_var(wd_enabled))
-               return rc;
-       switch (nmi_watchdog) {
-       case NMI_LOCAL_APIC:
-               rc |= lapic_wd_event(nmi_hz);
-               break;
-       case NMI_IO_APIC:
-               /*
-                * don't know how to accurately check for this.
-                * just assume it was a watchdog timer interrupt
-                * This matches the old behaviour.
-                */
-               rc = 1;
-               break;
-       }
-       return rc;
-}
-
-#ifdef CONFIG_SYSCTL
-
-static void enable_ioapic_nmi_watchdog_single(void *unused)
-{
-       __get_cpu_var(wd_enabled) = 1;
-       atomic_inc(&nmi_active);
-       __acpi_nmi_enable(NULL);
-}
-
-static void enable_ioapic_nmi_watchdog(void)
-{
-       on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
-       touch_nmi_watchdog();
-}
-
-static void disable_ioapic_nmi_watchdog(void)
-{
-       on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-}
-
-static int __init setup_unknown_nmi_panic(char *str)
-{
-       unknown_nmi_panic = 1;
-       return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
-       unsigned char reason = get_nmi_reason();
-       char buf[64];
-
-       sprintf(buf, "NMI received for unknown reason %02x\n", reason);
-       die_nmi(buf, regs, 1); /* Always panic here */
-       return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/nmi
- */
-int proc_nmi_enabled(struct ctl_table *table, int write,
-                       void __user *buffer, size_t *length, loff_t *ppos)
-{
-       int old_state;
-
-       nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
-       old_state = nmi_watchdog_enabled;
-       proc_dointvec(table, write, buffer, length, ppos);
-       if (!!old_state == !!nmi_watchdog_enabled)
-               return 0;
-
-       if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
-               printk(KERN_WARNING
-                       "NMI watchdog is permanently disabled\n");
-               return -EIO;
-       }
-
-       if (nmi_watchdog == NMI_LOCAL_APIC) {
-               if (nmi_watchdog_enabled)
-                       enable_lapic_nmi_watchdog();
-               else
-                       disable_lapic_nmi_watchdog();
-       } else if (nmi_watchdog == NMI_IO_APIC) {
-               if (nmi_watchdog_enabled)
-                       enable_ioapic_nmi_watchdog();
-               else
-                       disable_ioapic_nmi_watchdog();
-       } else {
-               printk(KERN_WARNING
-                       "NMI watchdog doesn't know what hardware to touch\n");
-               return -EIO;
-       }
-       return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-
-int do_nmi_callback(struct pt_regs *regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
-       if (unknown_nmi_panic)
-               return unknown_nmi_panic_callback(regs, cpu);
-#endif
-       return 0;
-}
-
-void arch_trigger_all_cpu_backtrace(void)
-{
-       int i;
-
-       cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
-
-       printk(KERN_INFO "sending NMI to all CPUs:\n");
-       apic->send_IPI_all(NMI_VECTOR);
-
-       /* Wait for up to 10 seconds for all CPUs to do the backtrace */
-       for (i = 0; i < 10 * 1000; i++) {
-               if (cpumask_empty(to_cpumask(backtrace_mask)))
-                       break;
-               mdelay(1);
-       }
-}
index c1c52c341f40a607be5b252746db31f36b8dc561..2a3f2a7db243f8b846ef5d3032287037d0111a69 100644 (file)
@@ -48,6 +48,16 @@ unsigned int uv_apicid_hibits;
 EXPORT_SYMBOL_GPL(uv_apicid_hibits);
 static DEFINE_SPINLOCK(uv_nmi_lock);
 
+static unsigned long __init uv_early_read_mmr(unsigned long addr)
+{
+       unsigned long val, *mmr;
+
+       mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
+       val = *mmr;
+       early_iounmap(mmr, sizeof(*mmr));
+       return val;
+}
+
 static inline bool is_GRU_range(u64 start, u64 end)
 {
        return start >= gru_start_paddr && end <= gru_end_paddr;
@@ -58,28 +68,24 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
        return is_ISA_range(start, end) || is_GRU_range(start, end);
 }
 
-static int early_get_nodeid(void)
+static int __init early_get_pnodeid(void)
 {
        union uvh_node_id_u node_id;
-       unsigned long *mmr;
-
-       mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
-       node_id.v = *mmr;
-       early_iounmap(mmr, sizeof(*mmr));
+       union uvh_rh_gam_config_mmr_u  m_n_config;
+       int pnode;
 
        /* Currently, all blades have same revision number */
+       node_id.v = uv_early_read_mmr(UVH_NODE_ID);
+       m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
        uv_min_hub_revision_id = node_id.s.revision;
 
-       return node_id.s.node_id;
+       pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
+       return pnode;
 }
 
 static void __init early_get_apic_pnode_shift(void)
 {
-       unsigned long *mmr;
-
-       mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
-       uvh_apicid.v = *mmr;
-       early_iounmap(mmr, sizeof(*mmr));
+       uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
        if (!uvh_apicid.v)
                /*
                 * Old bios, use default value
@@ -95,21 +101,17 @@ static void __init early_get_apic_pnode_shift(void)
 static void __init uv_set_apicid_hibit(void)
 {
        union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
-       unsigned long *mmr;
 
-       mmr = early_ioremap(UV_LOCAL_MMR_BASE |
-               UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
-       apicid_mask.v = *mmr;
-       early_iounmap(mmr, sizeof(*mmr));
+       apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK);
        uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
 }
 
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-       int nodeid;
+       int pnodeid;
 
        if (!strcmp(oem_id, "SGI")) {
-               nodeid = early_get_nodeid();
+               pnodeid = early_get_pnodeid();
                early_get_apic_pnode_shift();
                x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
                x86_platform.nmi_init = uv_nmi_init;
@@ -119,7 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
                        uv_system_type = UV_X2APIC;
                else if (!strcmp(oem_table_id, "UVH")) {
                        __get_cpu_var(x2apic_extra_bits) =
-                               nodeid << (uvh_apicid.s.pnode_shift - 1);
+                               pnodeid << uvh_apicid.s.pnode_shift;
                        uv_system_type = UV_NON_UNIQUE_APIC;
                        uv_set_apicid_hibit();
                        return 1;
@@ -682,27 +684,32 @@ void uv_nmi_init(void)
 void __init uv_system_init(void)
 {
        union uvh_rh_gam_config_mmr_u  m_n_config;
+       union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
        union uvh_node_id_u node_id;
        unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
-       int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+       int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io;
        int gnode_extra, max_pnode = 0;
        unsigned long mmr_base, present, paddr;
-       unsigned short pnode_mask;
+       unsigned short pnode_mask, pnode_io_mask;
 
        map_low_mmrs();
 
        m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
        m_val = m_n_config.s.m_skt;
        n_val = m_n_config.s.n_skt;
+       mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+       n_io = mmioh.s.n_io;
        mmr_base =
            uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
            ~UV_MMR_ENABLE;
        pnode_mask = (1 << n_val) - 1;
+       pnode_io_mask = (1 << n_io) - 1;
+
        node_id.v = uv_read_local_mmr(UVH_NODE_ID);
        gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
        gnode_upper = ((unsigned long)gnode_extra  << m_val);
-       printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n",
-                       n_val, m_val, gnode_upper, gnode_extra);
+       printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n",
+                       n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask);
 
        printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
 
@@ -735,7 +742,7 @@ void __init uv_system_init(void)
                for (j = 0; j < 64; j++) {
                        if (!test_bit(j, &present))
                                continue;
-                       pnode = (i * 64 + j);
+                       pnode = (i * 64 + j) & pnode_mask;
                        uv_blade_info[blade].pnode = pnode;
                        uv_blade_info[blade].nr_possible_cpus = 0;
                        uv_blade_info[blade].nr_online_cpus = 0;
@@ -756,6 +763,7 @@ void __init uv_system_init(void)
                /*
                 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
                 */
+               uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
                uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
                pnode = uv_apicid_to_pnode(apicid);
                blade = boot_pnode_to_blade(pnode);
@@ -772,7 +780,6 @@ void __init uv_system_init(void)
                uv_cpu_hub_info(cpu)->numa_blade_id = blade;
                uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
                uv_cpu_hub_info(cpu)->pnode = pnode;
-               uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
                uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
                uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
                uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
@@ -796,7 +803,7 @@ void __init uv_system_init(void)
 
        map_gru_high(max_pnode);
        map_mmr_high(max_pnode);
-       map_mmioh_high(max_pnode);
+       map_mmioh_high(max_pnode & pnode_io_mask);
 
        uv_cpu_init();
        uv_scir_register_cpu_notifier();
index 4b68bda30938d0a55ed39eeaeff68157266a9ea0..1d59834396bdc145c630e671d1bccd7769689a88 100644 (file)
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
 #else
        vgetcpu_set_mode();
 #endif
-       init_hw_perf_events();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
index 17ad0336621135a2310f3e9a2e5d9bf3d2f668ba..9ecf81f9b90fb0c73416d958b1aa216b17e1ecfa 100644 (file)
@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
 };
 
 struct amd_l3_cache {
-       struct   pci_dev *dev;
-       bool     can_disable;
+       struct   amd_northbridge *nb;
        unsigned indices;
        u8       subcaches[4];
 };
@@ -311,14 +310,12 @@ struct _cache_attr {
 /*
  * L3 cache descriptors
  */
-static struct amd_l3_cache **__cpuinitdata l3_caches;
-
 static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 {
        unsigned int sc0, sc1, sc2, sc3;
        u32 val = 0;
 
-       pci_read_config_dword(l3->dev, 0x1C4, &val);
+       pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
 
        /* calculate subcache sizes */
        l3->subcaches[0] = sc0 = !(val & BIT(0));
@@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
        l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 }
 
-static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
-{
-       struct amd_l3_cache *l3;
-       struct pci_dev *dev = node_to_k8_nb_misc(node);
-
-       l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
-       if (!l3) {
-               printk(KERN_WARNING "Error allocating L3 struct\n");
-               return NULL;
-       }
-
-       l3->dev = dev;
-
-       amd_calc_l3_indices(l3);
-
-       return l3;
-}
-
-static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
-                                          int index)
+static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
+                                       int index)
 {
+       static struct amd_l3_cache *__cpuinitdata l3_caches;
        int node;
 
-       if (boot_cpu_data.x86 != 0x10)
-               return;
-
-       if (index < 3)
-               return;
-
-       /* see errata #382 and #388 */
-       if (boot_cpu_data.x86_model < 0x8)
-               return;
-
-       if ((boot_cpu_data.x86_model == 0x8 ||
-            boot_cpu_data.x86_model == 0x9)
-               &&
-            boot_cpu_data.x86_mask < 0x1)
-                       return;
-
-       /* not in virtualized environments */
-       if (k8_northbridges.num == 0)
+       /* only for L3, and not in virtualized environments */
+       if (index < 3 || amd_nb_num() == 0)
                return;
 
        /*
@@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
         * never freed but this is done only on shutdown so it doesn't matter.
         */
        if (!l3_caches) {
-               int size = k8_northbridges.num * sizeof(struct amd_l3_cache *);
+               int size = amd_nb_num() * sizeof(struct amd_l3_cache);
 
                l3_caches = kzalloc(size, GFP_ATOMIC);
                if (!l3_caches)
@@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
 
        node = amd_get_nb_id(smp_processor_id());
 
-       if (!l3_caches[node]) {
-               l3_caches[node] = amd_init_l3_cache(node);
-               l3_caches[node]->can_disable = true;
+       if (!l3_caches[node].nb) {
+               l3_caches[node].nb = node_to_amd_nb(node);
+               amd_calc_l3_indices(&l3_caches[node]);
        }
 
-       WARN_ON(!l3_caches[node]);
-
-       this_leaf->l3 = l3_caches[node];
+       this_leaf->l3 = &l3_caches[node];
 }
 
 /*
@@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
 {
        unsigned int reg = 0;
 
-       pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg);
+       pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
 
        /* check whether this slot is activated already */
        if (reg & (3UL << 30))
@@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 {
        int index;
 
-       if (!this_leaf->l3 || !this_leaf->l3->can_disable)
+       if (!this_leaf->l3 ||
+           !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
                return -EINVAL;
 
        index = amd_get_l3_disable_slot(this_leaf->l3, slot);
@@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
                if (!l3->subcaches[i])
                        continue;
 
-               pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
+               pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
 
                /*
                 * We need to WBINVD on a core on the node containing the L3
@@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
                wbinvd_on_cpu(cpu);
 
                reg |= BIT(31);
-               pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
+               pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
        }
 }
 
@@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!this_leaf->l3 || !this_leaf->l3->can_disable)
+       if (!this_leaf->l3 ||
+           !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
                return -EINVAL;
 
        cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -545,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 #define STORE_CACHE_DISABLE(slot)                                      \
 static ssize_t                                                         \
 store_cache_disable_##slot(struct _cpuid4_info *this_leaf,             \
-                           const char *buf, size_t count)              \
+                          const char *buf, size_t count)               \
 {                                                                      \
        return store_cache_disable(this_leaf, buf, count, slot);        \
 }
@@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
                show_cache_disable_1, store_cache_disable_1);
 
 #else  /* CONFIG_AMD_NB */
-static void __cpuinit
-amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
-{
-};
+#define amd_init_l3_cache(x, y)
 #endif /* CONFIG_AMD_NB */
 
 static int
@@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
 
        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
                amd_cpuid4(index, &eax, &ebx, &ecx);
-               amd_check_l3_disable(this_leaf, index);
+               amd_init_l3_cache(this_leaf, index);
        } else {
                cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
        }
@@ -983,30 +944,48 @@ define_one_ro(size);
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_list);
 
-#define DEFAULT_SYSFS_CACHE_ATTRS      \
-       &type.attr,                     \
-       &level.attr,                    \
-       &coherency_line_size.attr,      \
-       &physical_line_partition.attr,  \
-       &ways_of_associativity.attr,    \
-       &number_of_sets.attr,           \
-       &size.attr,                     \
-       &shared_cpu_map.attr,           \
-       &shared_cpu_list.attr
-
 static struct attribute *default_attrs[] = {
-       DEFAULT_SYSFS_CACHE_ATTRS,
+       &type.attr,
+       &level.attr,
+       &coherency_line_size.attr,
+       &physical_line_partition.attr,
+       &ways_of_associativity.attr,
+       &number_of_sets.attr,
+       &size.attr,
+       &shared_cpu_map.attr,
+       &shared_cpu_list.attr,
        NULL
 };
 
-static struct attribute *default_l3_attrs[] = {
-       DEFAULT_SYSFS_CACHE_ATTRS,
 #ifdef CONFIG_AMD_NB
-       &cache_disable_0.attr,
-       &cache_disable_1.attr,
+static struct attribute ** __cpuinit amd_l3_attrs(void)
+{
+       static struct attribute **attrs;
+       int n;
+
+       if (attrs)
+               return attrs;
+
+       n = sizeof (default_attrs) / sizeof (struct attribute *);
+
+       if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+               n += 2;
+
+       attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
+       if (attrs == NULL)
+               return attrs = default_attrs;
+
+       for (n = 0; default_attrs[n]; n++)
+               attrs[n] = default_attrs[n];
+
+       if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
+               attrs[n++] = &cache_disable_0.attr;
+               attrs[n++] = &cache_disable_1.attr;
+       }
+
+       return attrs;
+}
 #endif
-       NULL
-};
 
 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
@@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 
                this_leaf = CPUID4_INFO_IDX(cpu, i);
 
-               if (this_leaf->l3 && this_leaf->l3->can_disable)
-                       ktype_cache.default_attrs = default_l3_attrs;
-               else
-                       ktype_cache.default_attrs = default_attrs;
-
+               ktype_cache.default_attrs = default_attrs;
+#ifdef CONFIG_AMD_NB
+               if (this_leaf->l3)
+                       ktype_cache.default_attrs = amd_l3_attrs();
+#endif
                retval = kobject_init_and_add(&(this_object->kobj),
                                              &ktype_cache,
                                              per_cpu(ici_cache_kobject, cpu),
index 80c482382d5c95e06b71ffdf91b6f5d362bf2b45..5bf2fac52aca7771b6b7827117b9d2b2778fd8ad 100644 (file)
@@ -31,8 +31,6 @@
 #include <asm/mce.h>
 #include <asm/msr.h>
 
-#define PFX               "mce_threshold: "
-#define VERSION           "version 1.1.1"
 #define NR_BANKS          6
 #define NR_BLOCKS         9
 #define THRESHOLD_MAX     0xFFF
@@ -59,12 +57,6 @@ struct threshold_block {
        struct list_head        miscj;
 };
 
-/* defaults used early on boot */
-static struct threshold_block threshold_defaults = {
-       .interrupt_enable       = 0,
-       .threshold_limit        = THRESHOLD_MAX,
-};
-
 struct threshold_bank {
        struct kobject          *kobj;
        struct threshold_block  *blocks;
@@ -89,50 +81,101 @@ static void amd_threshold_interrupt(void);
 struct thresh_restart {
        struct threshold_block  *b;
        int                     reset;
+       int                     set_lvt_off;
+       int                     lvt_off;
        u16                     old_limit;
 };
 
+static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
+{
+       int msr = (hi & MASK_LVTOFF_HI) >> 20;
+
+       if (apic < 0) {
+               pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
+                      "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
+                      b->bank, b->block, b->address, hi, lo);
+               return 0;
+       }
+
+       if (apic != msr) {
+               pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
+                      "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
+                      b->cpu, apic, b->bank, b->block, b->address, hi, lo);
+               return 0;
+       }
+
+       return 1;
+};
+
 /* must be called with correct cpu affinity */
 /* Called via smp_call_function_single() */
 static void threshold_restart_bank(void *_tr)
 {
        struct thresh_restart *tr = _tr;
-       u32 mci_misc_hi, mci_misc_lo;
+       u32 hi, lo;
 
-       rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+       rdmsr(tr->b->address, lo, hi);
 
-       if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+       if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
                tr->reset = 1;  /* limit cannot be lower than err count */
 
        if (tr->reset) {                /* reset err count and overflow bit */
-               mci_misc_hi =
-                   (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
+               hi =
+                   (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
                    (THRESHOLD_MAX - tr->b->threshold_limit);
        } else if (tr->old_limit) {     /* change limit w/o reset */
-               int new_count = (mci_misc_hi & THRESHOLD_MAX) +
+               int new_count = (hi & THRESHOLD_MAX) +
                    (tr->old_limit - tr->b->threshold_limit);
 
-               mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
+               hi = (hi & ~MASK_ERR_COUNT_HI) |
                    (new_count & THRESHOLD_MAX);
        }
 
+       if (tr->set_lvt_off) {
+               if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
+                       /* set new lvt offset */
+                       hi &= ~MASK_LVTOFF_HI;
+                       hi |= tr->lvt_off << 20;
+               }
+       }
+
        tr->b->interrupt_enable ?
-           (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
-           (mci_misc_hi &= ~MASK_INT_TYPE_HI);
+           (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
+           (hi &= ~MASK_INT_TYPE_HI);
 
-       mci_misc_hi |= MASK_COUNT_EN_HI;
-       wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+       hi |= MASK_COUNT_EN_HI;
+       wrmsr(tr->b->address, lo, hi);
+}
+
+static void mce_threshold_block_init(struct threshold_block *b, int offset)
+{
+       struct thresh_restart tr = {
+               .b                      = b,
+               .set_lvt_off            = 1,
+               .lvt_off                = offset,
+       };
+
+       b->threshold_limit              = THRESHOLD_MAX;
+       threshold_restart_bank(&tr);
+};
+
+static int setup_APIC_mce(int reserved, int new)
+{
+       if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
+                                             APIC_EILVT_MSG_FIX, 0))
+               return new;
+
+       return reserved;
 }
 
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
+       struct threshold_block b;
        unsigned int cpu = smp_processor_id();
        u32 low = 0, high = 0, address = 0;
        unsigned int bank, block;
-       struct thresh_restart tr;
-       int lvt_off = -1;
-       u8 offset;
+       int offset = -1;
 
        for (bank = 0; bank < NR_BANKS; ++bank) {
                for (block = 0; block < NR_BLOCKS; ++block) {
@@ -163,39 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
                        if (shared_bank[bank] && c->cpu_core_id)
                                break;
 #endif
-                       offset = (high & MASK_LVTOFF_HI) >> 20;
-                       if (lvt_off < 0) {
-                               if (setup_APIC_eilvt(offset,
-                                                    THRESHOLD_APIC_VECTOR,
-                                                    APIC_EILVT_MSG_FIX, 0)) {
-                                       pr_err(FW_BUG "cpu %d, failed to "
-                                              "setup threshold interrupt "
-                                              "for bank %d, block %d "
-                                              "(MSR%08X=0x%x%08x)",
-                                              smp_processor_id(), bank, block,
-                                              address, high, low);
-                                       continue;
-                               }
-                               lvt_off = offset;
-                       } else if (lvt_off != offset) {
-                               pr_err(FW_BUG "cpu %d, invalid threshold "
-                                      "interrupt offset %d for bank %d,"
-                                      "block %d (MSR%08X=0x%x%08x)",
-                                      smp_processor_id(), lvt_off, bank,
-                                      block, address, high, low);
-                               continue;
-                       }
-
-                       high &= ~MASK_LVTOFF_HI;
-                       high |= lvt_off << 20;
-                       wrmsr(address, low, high);
+                       offset = setup_APIC_mce(offset,
+                                               (high & MASK_LVTOFF_HI) >> 20);
 
-                       threshold_defaults.address = address;
-                       tr.b = &threshold_defaults;
-                       tr.reset = 0;
-                       tr.old_limit = 0;
-                       threshold_restart_bank(&tr);
+                       memset(&b, 0, sizeof(b));
+                       b.cpu           = cpu;
+                       b.bank          = bank;
+                       b.block         = block;
+                       b.address       = address;
 
+                       mce_threshold_block_init(&b, offset);
                        mce_threshold_vector = amd_threshold_interrupt;
                }
        }
@@ -298,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
 
        b->interrupt_enable = !!new;
 
+       memset(&tr, 0, sizeof(tr));
        tr.b            = b;
-       tr.reset        = 0;
-       tr.old_limit    = 0;
 
        smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
@@ -321,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
        if (new < 1)
                new = 1;
 
+       memset(&tr, 0, sizeof(tr));
        tr.old_limit = b->threshold_limit;
        b->threshold_limit = new;
        tr.b = b;
-       tr.reset = 0;
 
        smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
@@ -603,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
                        continue;
                err = threshold_create_bank(cpu, bank);
                if (err)
-                       goto out;
+                       return err;
        }
-out:
+
        return err;
 }
 
index 4b683267eca5fb982111375ab9e000d7eda3b437..e12246ff5aa6d2683ae8537a92da76e479a26bf7 100644 (file)
@@ -53,8 +53,13 @@ struct thermal_state {
        struct _thermal_state core_power_limit;
        struct _thermal_state package_throttle;
        struct _thermal_state package_power_limit;
+       struct _thermal_state core_thresh0;
+       struct _thermal_state core_thresh1;
 };
 
+/* Callback to handle core threshold interrupts */
+int (*platform_thermal_notify)(__u64 msr_val);
+
 static DEFINE_PER_CPU(struct thermal_state, thermal_state);
 
 static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level)
        return 0;
 }
 
+static int thresh_event_valid(int event)
+{
+       struct _thermal_state *state;
+       unsigned int this_cpu = smp_processor_id();
+       struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
+       u64 now = get_jiffies_64();
+
+       state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
+
+       if (time_before64(now, state->next_check))
+               return 0;
+
+       state->next_check = now + CHECK_INTERVAL;
+       return 1;
+}
+
 #ifdef CONFIG_SYSFS
 /* Add/Remove thermal_throttle interface for CPU device: */
 static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
@@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device);
 #define PACKAGE_THROTTLED      ((__u64)2 << 62)
 #define PACKAGE_POWER_LIMIT    ((__u64)3 << 62)
 
+static void notify_thresholds(__u64 msr_val)
+{
+       /* check whether the interrupt handler is defined;
+        * otherwise simply return
+        */
+       if (!platform_thermal_notify)
+               return;
+
+       /* lower threshold reached */
+       if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
+               platform_thermal_notify(msr_val);
+       /* higher threshold reached */
+       if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
+               platform_thermal_notify(msr_val);
+}
+
 /* Thermal transition interrupt handler */
 static void intel_thermal_interrupt(void)
 {
@@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void)
 
        rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 
+       /* Check for violation of core thermal thresholds*/
+       notify_thresholds(msr_val);
+
        if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
                                THERMAL_THROTTLING_EVENT,
                                CORE_LEVEL) != 0)
index 6d75b9145b13f0e68a106acd76b0d458c827d099..0a360d146596b6d01f8c833e655150faa3ae9605 100644 (file)
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
 {
        int i;
 
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               disable_lapic_nmi_watchdog();
-
        for (i = 0; i < x86_pmu.num_counters; i++) {
                if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
                        goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
        for (i--; i >= 0; i--)
                release_perfctr_nmi(x86_pmu.perfctr + i);
 
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               enable_lapic_nmi_watchdog();
-
        return false;
 }
 
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
                release_perfctr_nmi(x86_pmu.perfctr + i);
                release_evntsel_nmi(x86_pmu.eventsel + i);
        }
-
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               enable_lapic_nmi_watchdog();
 }
 
 #else
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {}
 static bool check_hw_exists(void)
 {
        u64 val, val_new = 0;
-       int ret = 0;
+       int i, reg, ret = 0;
+
+       /*
+        * Check to see if the BIOS enabled any of the counters, if so
+        * complain and bail.
+        */
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               reg = x86_pmu.eventsel + i;
+               ret = rdmsrl_safe(reg, &val);
+               if (ret)
+                       goto msr_fail;
+               if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
+                       goto bios_fail;
+       }
 
+       if (x86_pmu.num_counters_fixed) {
+               reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+               ret = rdmsrl_safe(reg, &val);
+               if (ret)
+                       goto msr_fail;
+               for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
+                       if (val & (0x03 << i*4))
+                               goto bios_fail;
+               }
+       }
+
+       /*
+        * Now write a value and read it back to see if it matches,
+        * this is needed to detect certain hardware emulators (qemu/kvm)
+        * that don't trap on the MSR access and always return 0s.
+        */
        val = 0xabcdUL;
-       ret |= checking_wrmsrl(x86_pmu.perfctr, val);
+       ret = checking_wrmsrl(x86_pmu.perfctr, val);
        ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
        if (ret || val != val_new)
-               return false;
+               goto msr_fail;
 
        return true;
+
+bios_fail:
+       printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
+       printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
+       return false;
+
+msr_fail:
+       printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
+       return false;
 }
 
 static void reserve_ds_buffers(void);
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event)
        struct hw_perf_event *hwc = &event->hw;
        u64 config;
 
-       if (!hwc->sample_period) {
+       if (!is_sampling_event(event)) {
                hwc->sample_period = x86_pmu.max_period;
                hwc->last_period = hwc->sample_period;
                local64_set(&hwc->period_left, hwc->sample_period);
@@ -1362,7 +1391,7 @@ static void __init pmu_check_apic(void)
        pr_info("no hardware sampling interrupt available.\n");
 }
 
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
        struct event_constraint *c;
        int err;
@@ -1377,20 +1406,18 @@ void __init init_hw_perf_events(void)
                err = amd_pmu_init();
                break;
        default:
-               return;
+               return 0;
        }
        if (err != 0) {
                pr_cont("no PMU driver, software events only.\n");
-               return;
+               return 0;
        }
 
        pmu_check_apic();
 
        /* sanity check that the hardware exists or is emulated */
-       if (!check_hw_exists()) {
-               pr_cont("Broken PMU hardware detected, software events only.\n");
-               return;
-       }
+       if (!check_hw_exists())
+               return 0;
 
        pr_cont("%s PMU driver.\n", x86_pmu.name);
 
@@ -1438,9 +1465,12 @@ void __init init_hw_perf_events(void)
        pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
        pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
        perf_cpu_notifier(x86_pmu_notifier);
+
+       return 0;
 }
+early_initcall(init_hw_perf_events);
 
 static inline void x86_pmu_read(struct perf_event *event)
 {
@@ -1686,7 +1716,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
        perf_callchain_store(entry, regs->ip);
 
-       dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
+       dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
 }
 
 #ifdef CONFIG_COMPAT
index e421b8cd6944af860c4b28a1a176a14320ac9f79..67e2202a60393cd48a0f2251862c59f65bf7e667 100644 (file)
@@ -1,7 +1,5 @@
 #ifdef CONFIG_CPU_SUP_AMD
 
-static DEFINE_RAW_SPINLOCK(amd_nb_lock);
-
 static __initconst const u64 amd_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -275,7 +273,7 @@ done:
        return &emptyconstraint;
 }
 
-static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
+static struct amd_nb *amd_alloc_nb(int cpu)
 {
        struct amd_nb *nb;
        int i;
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
        if (!nb)
                return NULL;
 
-       nb->nb_id = nb_id;
+       nb->nb_id = -1;
 
        /*
         * initialize all possible NB constraints
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu)
        if (boot_cpu_data.x86_max_cores < 2)
                return NOTIFY_OK;
 
-       cpuc->amd_nb = amd_alloc_nb(cpu, -1);
+       cpuc->amd_nb = amd_alloc_nb(cpu);
        if (!cpuc->amd_nb)
                return NOTIFY_BAD;
 
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu)
        nb_id = amd_get_nb_id(cpu);
        WARN_ON_ONCE(nb_id == BAD_APICID);
 
-       raw_spin_lock(&amd_nb_lock);
-
        for_each_online_cpu(i) {
                nb = per_cpu(cpu_hw_events, i).amd_nb;
                if (WARN_ON_ONCE(!nb))
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu)
 
        cpuc->amd_nb->nb_id = nb_id;
        cpuc->amd_nb->refcnt++;
-
-       raw_spin_unlock(&amd_nb_lock);
 }
 
 static void amd_pmu_cpu_dead(int cpu)
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu)
 
        cpuhw = &per_cpu(cpu_hw_events, cpu);
 
-       raw_spin_lock(&amd_nb_lock);
-
        if (cpuhw->amd_nb) {
                struct amd_nb *nb = cpuhw->amd_nb;
 
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu)
 
                cpuhw->amd_nb = NULL;
        }
-
-       raw_spin_unlock(&amd_nb_lock);
 }
 
 static __initconst const struct x86_pmu amd_pmu = {
index c8f5c088cad11ae3f245e1e7374bb43c915170d6..24e390e40f2e0b484d4b2b09084deb9d120d59b4 100644 (file)
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event)
        if (ret)
                return ret;
 
+       if (event->attr.precise_ip &&
+           (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+               /*
+                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+                * (0x003c) so that we can use it with PEBS.
+                *
+                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+                * PEBS capable. However we can use INST_RETIRED.ANY_P
+                * (0x00c0), which is a PEBS capable event, to get the same
+                * count.
+                *
+                * INST_RETIRED.ANY_P counts the number of cycles that retires
+                * CNTMASK instructions. By setting CNTMASK to a value (16)
+                * larger than the maximum number of instructions that can be
+                * retired per cycle (4) and then inverting the condition, we
+                * count all cycles that retire 16 or less instructions, which
+                * is every cycle.
+                *
+                * Thereby we gain a PEBS capable cycle counter.
+                */
+               u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
+
+               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+               event->hw.config = alt_config;
+       }
+
        if (event->attr.type != PERF_TYPE_RAW)
                return 0;
 
index d9f4ff8fcd693c509b2d079b381a8e8683d10d9c..d5a236615501fd6a41fb6f6bc76bfd2369a47bc5 100644 (file)
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/smp.h>
-#include <linux/nmi.h>
+#include <asm/nmi.h>
 #include <linux/kprobes.h>
 
 #include <asm/apic.h>
 #include <asm/perf_event.h>
 
-struct nmi_watchdog_ctlblk {
-       unsigned int cccr_msr;
-       unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
-       unsigned int evntsel_msr;  /* the MSR to select the events to handle */
-};
-
-/* Interface defining a CPU specific perfctr watchdog */
-struct wd_ops {
-       int (*reserve)(void);
-       void (*unreserve)(void);
-       int (*setup)(unsigned nmi_hz);
-       void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
-       void (*stop)(void);
-       unsigned perfctr;
-       unsigned evntsel;
-       u64 checkbit;
-};
-
-static const struct wd_ops *wd_ops;
-
 /*
  * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
 static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
 static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
 
-static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
-
 /* converts an msr to an appropriate reservation bit */
 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
 {
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
        clear_bit(counter, evntsel_nmi_owner);
 }
 EXPORT_SYMBOL(release_evntsel_nmi);
-
-void disable_lapic_nmi_watchdog(void)
-{
-       BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
-       if (atomic_read(&nmi_active) <= 0)
-               return;
-
-       on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-
-       if (wd_ops)
-               wd_ops->unreserve();
-
-       BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-void enable_lapic_nmi_watchdog(void)
-{
-       BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
-       /* are we already enabled */
-       if (atomic_read(&nmi_active) != 0)
-               return;
-
-       /* are we lapic aware */
-       if (!wd_ops)
-               return;
-       if (!wd_ops->reserve()) {
-               printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
-               return;
-       }
-
-       on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
-       touch_nmi_watchdog();
-}
-
-/*
- * Activate the NMI watchdog via the local APIC.
- */
-
-static unsigned int adjust_for_32bit_ctr(unsigned int hz)
-{
-       u64 counter_val;
-       unsigned int retval = hz;
-
-       /*
-        * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
-        * are writable, with higher bits sign extending from bit 31.
-        * So, we can only program the counter with 31 bit values and
-        * 32nd bit should be 1, for 33.. to be 1.
-        * Find the appropriate nmi_hz
-        */
-       counter_val = (u64)cpu_khz * 1000;
-       do_div(counter_val, retval);
-       if (counter_val > 0x7fffffffULL) {
-               u64 count = (u64)cpu_khz * 1000;
-               do_div(count, 0x7fffffffUL);
-               retval = count + 1;
-       }
-       return retval;
-}
-
-static void write_watchdog_counter(unsigned int perfctr_msr,
-                               const char *descr, unsigned nmi_hz)
-{
-       u64 count = (u64)cpu_khz * 1000;
-
-       do_div(count, nmi_hz);
-       if (descr)
-               pr_debug("setting %s to -0x%08Lx\n", descr, count);
-       wrmsrl(perfctr_msr, 0 - count);
-}
-
-static void write_watchdog_counter32(unsigned int perfctr_msr,
-                               const char *descr, unsigned nmi_hz)
-{
-       u64 count = (u64)cpu_khz * 1000;
-
-       do_div(count, nmi_hz);
-       if (descr)
-               pr_debug("setting %s to -0x%08Lx\n", descr, count);
-       wrmsr(perfctr_msr, (u32)(-count), 0);
-}
-
-/*
- * AMD K7/K8/Family10h/Family11h support.
- * AMD keeps this interface nicely stable so there is not much variety
- */
-#define K7_EVNTSEL_ENABLE      (1 << 22)
-#define K7_EVNTSEL_INT         (1 << 20)
-#define K7_EVNTSEL_OS          (1 << 17)
-#define K7_EVNTSEL_USR         (1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING   0x76
-#define K7_NMI_EVENT           K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-
-static int setup_k7_watchdog(unsigned nmi_hz)
-{
-       unsigned int perfctr_msr, evntsel_msr;
-       unsigned int evntsel;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       perfctr_msr = wd_ops->perfctr;
-       evntsel_msr = wd_ops->evntsel;
-
-       wrmsrl(perfctr_msr, 0UL);
-
-       evntsel = K7_EVNTSEL_INT
-               | K7_EVNTSEL_OS
-               | K7_EVNTSEL_USR
-               | K7_NMI_EVENT;
-
-       /* setup the timer */
-       wrmsr(evntsel_msr, evntsel, 0);
-       write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
-
-       /* initialize the wd struct before enabling */
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = 0;  /* unused */
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= K7_EVNTSEL_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
-
-       return 1;
-}
-
-static void single_msr_stop_watchdog(void)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int single_msr_reserve(void)
-{
-       if (!reserve_perfctr_nmi(wd_ops->perfctr))
-               return 0;
-
-       if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
-               release_perfctr_nmi(wd_ops->perfctr);
-               return 0;
-       }
-       return 1;
-}
-
-static void single_msr_unreserve(void)
-{
-       release_evntsel_nmi(wd_ops->evntsel);
-       release_perfctr_nmi(wd_ops->perfctr);
-}
-
-static void __kprobes
-single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-       /* start the cycle over again */
-       write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops k7_wd_ops = {
-       .reserve        = single_msr_reserve,
-       .unreserve      = single_msr_unreserve,
-       .setup          = setup_k7_watchdog,
-       .rearm          = single_msr_rearm,
-       .stop           = single_msr_stop_watchdog,
-       .perfctr        = MSR_K7_PERFCTR0,
-       .evntsel        = MSR_K7_EVNTSEL0,
-       .checkbit       = 1ULL << 47,
-};
-
-/*
- * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
- */
-#define P6_EVNTSEL0_ENABLE     (1 << 22)
-#define P6_EVNTSEL_INT         (1 << 20)
-#define P6_EVNTSEL_OS          (1 << 17)
-#define P6_EVNTSEL_USR         (1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
-#define P6_NMI_EVENT           P6_EVENT_CPU_CLOCKS_NOT_HALTED
-
-static int setup_p6_watchdog(unsigned nmi_hz)
-{
-       unsigned int perfctr_msr, evntsel_msr;
-       unsigned int evntsel;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       perfctr_msr = wd_ops->perfctr;
-       evntsel_msr = wd_ops->evntsel;
-
-       /* KVM doesn't implement this MSR */
-       if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
-               return 0;
-
-       evntsel = P6_EVNTSEL_INT
-               | P6_EVNTSEL_OS
-               | P6_EVNTSEL_USR
-               | P6_NMI_EVENT;
-
-       /* setup the timer */
-       wrmsr(evntsel_msr, evntsel, 0);
-       nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-       write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
-
-       /* initialize the wd struct before enabling */
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = 0;  /* unused */
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= P6_EVNTSEL0_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
-
-       return 1;
-}
-
-static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-       /*
-        * P6 based Pentium M need to re-unmask
-        * the apic vector but it doesn't hurt
-        * other P6 variant.
-        * ArchPerfom/Core Duo also needs this
-        */
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-
-       /* P6/ARCH_PERFMON has 32 bit counter write */
-       write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p6_wd_ops = {
-       .reserve        = single_msr_reserve,
-       .unreserve      = single_msr_unreserve,
-       .setup          = setup_p6_watchdog,
-       .rearm          = p6_rearm,
-       .stop           = single_msr_stop_watchdog,
-       .perfctr        = MSR_P6_PERFCTR0,
-       .evntsel        = MSR_P6_EVNTSEL0,
-       .checkbit       = 1ULL << 39,
-};
-
-/*
- * Intel P4 performance counters.
- * By far the most complicated of all.
- */
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL  (1 << 7)
-#define P4_ESCR_EVENT_SELECT(N)        ((N) << 25)
-#define P4_ESCR_OS             (1 << 3)
-#define P4_ESCR_USR            (1 << 2)
-#define P4_CCCR_OVF_PMI0       (1 << 26)
-#define P4_CCCR_OVF_PMI1       (1 << 27)
-#define P4_CCCR_THRESHOLD(N)   ((N) << 20)
-#define P4_CCCR_COMPLEMENT     (1 << 19)
-#define P4_CCCR_COMPARE                (1 << 18)
-#define P4_CCCR_REQUIRED       (3 << 16)
-#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
-#define P4_CCCR_ENABLE         (1 << 12)
-#define P4_CCCR_OVF            (1 << 31)
-
-#define P4_CONTROLS 18
-static unsigned int p4_controls[18] = {
-       MSR_P4_BPU_CCCR0,
-       MSR_P4_BPU_CCCR1,
-       MSR_P4_BPU_CCCR2,
-       MSR_P4_BPU_CCCR3,
-       MSR_P4_MS_CCCR0,
-       MSR_P4_MS_CCCR1,
-       MSR_P4_MS_CCCR2,
-       MSR_P4_MS_CCCR3,
-       MSR_P4_FLAME_CCCR0,
-       MSR_P4_FLAME_CCCR1,
-       MSR_P4_FLAME_CCCR2,
-       MSR_P4_FLAME_CCCR3,
-       MSR_P4_IQ_CCCR0,
-       MSR_P4_IQ_CCCR1,
-       MSR_P4_IQ_CCCR2,
-       MSR_P4_IQ_CCCR3,
-       MSR_P4_IQ_CCCR4,
-       MSR_P4_IQ_CCCR5,
-};
-/*
- * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- * CRU_ESCR0 (with any non-null event selector) through a complemented
- * max threshold. [IA32-Vol3, Section 14.9.9]
- */
-static int setup_p4_watchdog(unsigned nmi_hz)
-{
-       unsigned int perfctr_msr, evntsel_msr, cccr_msr;
-       unsigned int evntsel, cccr_val;
-       unsigned int misc_enable, dummy;
-       unsigned int ht_num;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
-       if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
-               return 0;
-
-#ifdef CONFIG_SMP
-       /* detect which hyperthread we are on */
-       if (smp_num_siblings == 2) {
-               unsigned int ebx, apicid;
-
-               ebx = cpuid_ebx(1);
-               apicid = (ebx >> 24) & 0xff;
-               ht_num = apicid & 1;
-       } else
-#endif
-               ht_num = 0;
-
-       /*
-        * performance counters are shared resources
-        * assign each hyperthread its own set
-        * (re-use the ESCR0 register, seems safe
-        * and keeps the cccr_val the same)
-        */
-       if (!ht_num) {
-               /* logical cpu 0 */
-               perfctr_msr = MSR_P4_IQ_PERFCTR0;
-               evntsel_msr = MSR_P4_CRU_ESCR0;
-               cccr_msr = MSR_P4_IQ_CCCR0;
-               cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
-
-               /*
-                * If we're on the kdump kernel or other situation, we may
-                * still have other performance counter registers set to
-                * interrupt and they'll keep interrupting forever because
-                * of the P4_CCCR_OVF quirk. So we need to ACK all the
-                * pending interrupts and disable all the registers here,
-                * before reenabling the NMI delivery. Refer to p4_rearm()
-                * about the P4_CCCR_OVF quirk.
-                */
-               if (reset_devices) {
-                       unsigned int low, high;
-                       int i;
-
-                       for (i = 0; i < P4_CONTROLS; i++) {
-                               rdmsr(p4_controls[i], low, high);
-                               low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
-                               wrmsr(p4_controls[i], low, high);
-                       }
-               }
-       } else {
-               /* logical cpu 1 */
-               perfctr_msr = MSR_P4_IQ_PERFCTR1;
-               evntsel_msr = MSR_P4_CRU_ESCR0;
-               cccr_msr = MSR_P4_IQ_CCCR1;
-
-               /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
-               if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
-                       cccr_val = P4_CCCR_OVF_PMI0;
-               else
-                       cccr_val = P4_CCCR_OVF_PMI1;
-               cccr_val |= P4_CCCR_ESCR_SELECT(4);
-       }
-
-       evntsel = P4_ESCR_EVENT_SELECT(0x3F)
-               | P4_ESCR_OS
-               | P4_ESCR_USR;
-
-       cccr_val |= P4_CCCR_THRESHOLD(15)
-                | P4_CCCR_COMPLEMENT
-                | P4_CCCR_COMPARE
-                | P4_CCCR_REQUIRED;
-
-       wrmsr(evntsel_msr, evntsel, 0);
-       wrmsr(cccr_msr, cccr_val, 0);
-       write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
-
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = cccr_msr;
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       cccr_val |= P4_CCCR_ENABLE;
-       wrmsr(cccr_msr, cccr_val, 0);
-       return 1;
-}
-
-static void stop_p4_watchdog(void)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-       wrmsr(wd->cccr_msr, 0, 0);
-       wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int p4_reserve(void)
-{
-       if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
-               return 0;
-#ifdef CONFIG_SMP
-       if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
-               goto fail1;
-#endif
-       if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
-               goto fail2;
-       /* RED-PEN why is ESCR1 not reserved here? */
-       return 1;
- fail2:
-#ifdef CONFIG_SMP
-       if (smp_num_siblings > 1)
-               release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
- fail1:
-#endif
-       release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
-       return 0;
-}
-
-static void p4_unreserve(void)
-{
-#ifdef CONFIG_SMP
-       if (smp_num_siblings > 1)
-               release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
-#endif
-       release_evntsel_nmi(MSR_P4_CRU_ESCR0);
-       release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
-}
-
-static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-       unsigned dummy;
-       /*
-        * P4 quirks:
-        * - An overflown perfctr will assert its interrupt
-        *   until the OVF flag in its CCCR is cleared.
-        * - LVTPC is masked on interrupt and must be
-        *   unmasked by the LVTPC handler.
-        */
-       rdmsrl(wd->cccr_msr, dummy);
-       dummy &= ~P4_CCCR_OVF;
-       wrmsrl(wd->cccr_msr, dummy);
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       /* start the cycle over again */
-       write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p4_wd_ops = {
-       .reserve        = p4_reserve,
-       .unreserve      = p4_unreserve,
-       .setup          = setup_p4_watchdog,
-       .rearm          = p4_rearm,
-       .stop           = stop_p4_watchdog,
-       /* RED-PEN this is wrong for the other sibling */
-       .perfctr        = MSR_P4_BPU_PERFCTR0,
-       .evntsel        = MSR_P4_BSU_ESCR0,
-       .checkbit       = 1ULL << 39,
-};
-
-/*
- * Watchdog using the Intel architected PerfMon.
- * Used for Core2 and hopefully all future Intel CPUs.
- */
-#define ARCH_PERFMON_NMI_EVENT_SEL     ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK   ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-
-static struct wd_ops intel_arch_wd_ops;
-
-static int setup_intel_arch_watchdog(unsigned nmi_hz)
-{
-       unsigned int ebx;
-       union cpuid10_eax eax;
-       unsigned int unused;
-       unsigned int perfctr_msr, evntsel_msr;
-       unsigned int evntsel;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       /*
-        * Check whether the Architectural PerfMon supports
-        * Unhalted Core Cycles Event or not.
-        * NOTE: Corresponding bit = 0 in ebx indicates event present.
-        */
-       cpuid(10, &(eax.full), &ebx, &unused, &unused);
-       if ((eax.split.mask_length <
-                       (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
-           (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
-               return 0;
-
-       perfctr_msr = wd_ops->perfctr;
-       evntsel_msr = wd_ops->evntsel;
-
-       wrmsrl(perfctr_msr, 0UL);
-
-       evntsel = ARCH_PERFMON_EVENTSEL_INT
-               | ARCH_PERFMON_EVENTSEL_OS
-               | ARCH_PERFMON_EVENTSEL_USR
-               | ARCH_PERFMON_NMI_EVENT_SEL
-               | ARCH_PERFMON_NMI_EVENT_UMASK;
-
-       /* setup the timer */
-       wrmsr(evntsel_msr, evntsel, 0);
-       nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-       write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
-
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = 0;  /* unused */
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
-       intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
-       return 1;
-}
-
-static struct wd_ops intel_arch_wd_ops __read_mostly = {
-       .reserve        = single_msr_reserve,
-       .unreserve      = single_msr_unreserve,
-       .setup          = setup_intel_arch_watchdog,
-       .rearm          = p6_rearm,
-       .stop           = single_msr_stop_watchdog,
-       .perfctr        = MSR_ARCH_PERFMON_PERFCTR1,
-       .evntsel        = MSR_ARCH_PERFMON_EVENTSEL1,
-};
-
-static void probe_nmi_watchdog(void)
-{
-       switch (boot_cpu_data.x86_vendor) {
-       case X86_VENDOR_AMD:
-               if (boot_cpu_data.x86 == 6 ||
-                   (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
-                       wd_ops = &k7_wd_ops;
-               return;
-       case X86_VENDOR_INTEL:
-               /* Work around where perfctr1 doesn't have a working enable
-                * bit as described in the following errata:
-                * AE49 Core Duo and Intel Core Solo 65 nm
-                * AN49 Intel Pentium Dual-Core
-                * AF49 Dual-Core Intel Xeon Processor LV
-                */
-               if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
-                   ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
-                    boot_cpu_data.x86_mask == 4))) {
-                       intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
-                       intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
-               }
-               if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-                       wd_ops = &intel_arch_wd_ops;
-                       break;
-               }
-               switch (boot_cpu_data.x86) {
-               case 6:
-                       if (boot_cpu_data.x86_model > 13)
-                               return;
-
-                       wd_ops = &p6_wd_ops;
-                       break;
-               case 15:
-                       wd_ops = &p4_wd_ops;
-                       break;
-               default:
-                       return;
-               }
-               break;
-       }
-}
-
-/* Interface to nmi.c */
-
-int lapic_watchdog_init(unsigned nmi_hz)
-{
-       if (!wd_ops) {
-               probe_nmi_watchdog();
-               if (!wd_ops) {
-                       printk(KERN_INFO "NMI watchdog: CPU not supported\n");
-                       return -1;
-               }
-
-               if (!wd_ops->reserve()) {
-                       printk(KERN_ERR
-                               "NMI watchdog: cannot reserve perfctrs\n");
-                       return -1;
-               }
-       }
-
-       if (!(wd_ops->setup(nmi_hz))) {
-               printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
-                      raw_smp_processor_id());
-               return -1;
-       }
-
-       return 0;
-}
-
-void lapic_watchdog_stop(void)
-{
-       if (wd_ops)
-               wd_ops->stop();
-}
-
-unsigned lapic_adjust_nmi_hz(unsigned hz)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-       if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
-           wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
-               hz = adjust_for_32bit_ctr(hz);
-       return hz;
-}
-
-int __kprobes lapic_wd_event(unsigned nmi_hz)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-       u64 ctr;
-
-       rdmsrl(wd->perfctr_msr, ctr);
-       if (ctr & wd_ops->checkbit) /* perfctr still running? */
-               return 0;
-
-       wd_ops->rearm(wd, nmi_hz);
-       return 1;
-}
index 6e8752c1bd5241fc9e7e63ee088f06c84d0526fb..8474c998cbd40d2f3481f87879f5a42d1f105e52 100644 (file)
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
 
 void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp, char *log_lvl)
+               unsigned long *stack, char *log_lvl)
 {
        printk("%sCall Trace:\n", log_lvl);
-       dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+       dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
 }
 
 void show_trace(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp)
+               unsigned long *stack)
 {
-       show_trace_log_lvl(task, regs, stack, bp, "");
+       show_trace_log_lvl(task, regs, stack, "");
 }
 
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
-       show_stack_log_lvl(task, NULL, sp, 0, "");
+       show_stack_log_lvl(task, NULL, sp, "");
 }
 
 /*
@@ -210,7 +210,7 @@ void dump_stack(void)
                init_utsname()->release,
                (int)strcspn(init_utsname()->version, " "),
                init_utsname()->version);
-       show_trace(NULL, NULL, &stack, bp);
+       show_trace(NULL, NULL, &stack);
 }
 EXPORT_SYMBOL(dump_stack);
 
index 1bc7f75a5bdaf823999f7b90271808819b6f3ad3..74cc1eda384b8d26437a10fa55e3f64cfcf4545a 100644 (file)
 #include <asm/stacktrace.h>
 
 
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+               struct pt_regs *regs, unsigned long *stack,
                const struct stacktrace_ops *ops, void *data)
 {
        int graph = 0;
+       unsigned long bp;
 
        if (!task)
                task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
                        stack = (unsigned long *)task->thread.sp;
        }
 
-#ifdef CONFIG_FRAME_POINTER
-       if (!bp) {
-               if (task == current) {
-                       /* Grab bp right from our regs */
-                       get_bp(bp);
-               } else {
-                       /* bp is the last reg pushed by switch_to */
-                       bp = *(unsigned long *) task->thread.sp;
-               }
-       }
-#endif
-
+       bp = stack_frame(task, regs);
        for (;;) {
                struct thread_info *context;
 
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
 
 void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                  unsigned long *sp, unsigned long bp, char *log_lvl)
+                  unsigned long *sp, char *log_lvl)
 {
        unsigned long *stack;
        int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
                touch_nmi_watchdog();
        }
        printk(KERN_CONT "\n");
-       show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+       show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
 
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
                u8 *ip;
 
                printk(KERN_EMERG "Stack:\n");
-               show_stack_log_lvl(NULL, regs, &regs->sp,
-                               0, KERN_EMERG);
+               show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
 
                printk(KERN_EMERG "Code: ");
 
index 6a340485249a965f29686ac84117458d6cb72be6..64101335de19aad09ec03d75dea8260b28051747 100644 (file)
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+               struct pt_regs *regs, unsigned long *stack,
                const struct stacktrace_ops *ops, void *data)
 {
        const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
        unsigned used = 0;
        struct thread_info *tinfo;
        int graph = 0;
+       unsigned long bp;
 
        if (!task)
                task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
                        stack = (unsigned long *)task->thread.sp;
        }
 
-#ifdef CONFIG_FRAME_POINTER
-       if (!bp) {
-               if (task == current) {
-                       /* Grab bp right from our regs */
-                       get_bp(bp);
-               } else {
-                       /* bp is the last reg pushed by switch_to */
-                       bp = *(unsigned long *) task->thread.sp;
-               }
-       }
-#endif
-
+       bp = stack_frame(task, regs);
        /*
         * Print function call entries in all stacks, starting at the
         * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
 
 void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                  unsigned long *sp, unsigned long bp, char *log_lvl)
+                  unsigned long *sp, char *log_lvl)
 {
        unsigned long *irq_stack_end;
        unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
        preempt_enable();
 
        printk(KERN_CONT "\n");
-       show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+       show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
 void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
 
                printk(KERN_EMERG "Stack:\n");
                show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
-                               regs->bp, KERN_EMERG);
+                                  KERN_EMERG);
 
                printk(KERN_EMERG "Code: ");
 
index 4572f25f93255f8bb4a5e5158d3c9949f912b657..cd28a350f7f933162a6fa9bd0de08c64e22495f2 100644 (file)
@@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf)
                if (!strncmp(buf, "xen", 3))
                        early_console_register(&xenboot_console, keep);
 #endif
-#ifdef CONFIG_X86_MRST_EARLY_PRINTK
+#ifdef CONFIG_EARLY_PRINTK_MRST
                if (!strncmp(buf, "mrst", 4)) {
                        mrst_early_console_init();
                        early_console_register(&early_mrst_console, keep);
@@ -250,7 +250,6 @@ static int __init setup_early_printk(char *buf)
                        hsu_early_console_init();
                        early_console_register(&early_hsu_console, keep);
                }
-
 #endif
                buf++;
        }
diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/kernel/early_printk_mrst.c
deleted file mode 100644 (file)
index 65df603..0000000
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * early_printk_mrst.c - early consoles for Intel MID platforms
- *
- * Copyright (c) 2008-2010, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-/*
- * This file implements two early consoles named mrst and hsu.
- * mrst is based on Maxim3110 spi-uart device, it exists in both
- * Moorestown and Medfield platforms, while hsu is based on a High
- * Speed UART device which only exists in the Medfield platform
- */
-
-#include <linux/serial_reg.h>
-#include <linux/serial_mfd.h>
-#include <linux/kmsg_dump.h>
-#include <linux/console.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/io.h>
-
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/mrst.h>
-
-#define MRST_SPI_TIMEOUT               0x200000
-#define MRST_REGBASE_SPI0              0xff128000
-#define MRST_REGBASE_SPI1              0xff128400
-#define MRST_CLK_SPI0_REG              0xff11d86c
-
-/* Bit fields in CTRLR0 */
-#define SPI_DFS_OFFSET                 0
-
-#define SPI_FRF_OFFSET                 4
-#define SPI_FRF_SPI                    0x0
-#define SPI_FRF_SSP                    0x1
-#define SPI_FRF_MICROWIRE              0x2
-#define SPI_FRF_RESV                   0x3
-
-#define SPI_MODE_OFFSET                        6
-#define SPI_SCPH_OFFSET                        6
-#define SPI_SCOL_OFFSET                        7
-#define SPI_TMOD_OFFSET                        8
-#define        SPI_TMOD_TR                     0x0             /* xmit & recv */
-#define SPI_TMOD_TO                    0x1             /* xmit only */
-#define SPI_TMOD_RO                    0x2             /* recv only */
-#define SPI_TMOD_EPROMREAD             0x3             /* eeprom read mode */
-
-#define SPI_SLVOE_OFFSET               10
-#define SPI_SRL_OFFSET                 11
-#define SPI_CFS_OFFSET                 12
-
-/* Bit fields in SR, 7 bits */
-#define SR_MASK                                0x7f            /* cover 7 bits */
-#define SR_BUSY                                (1 << 0)
-#define SR_TF_NOT_FULL                 (1 << 1)
-#define SR_TF_EMPT                     (1 << 2)
-#define SR_RF_NOT_EMPT                 (1 << 3)
-#define SR_RF_FULL                     (1 << 4)
-#define SR_TX_ERR                      (1 << 5)
-#define SR_DCOL                                (1 << 6)
-
-struct dw_spi_reg {
-       u32     ctrl0;
-       u32     ctrl1;
-       u32     ssienr;
-       u32     mwcr;
-       u32     ser;
-       u32     baudr;
-       u32     txfltr;
-       u32     rxfltr;
-       u32     txflr;
-       u32     rxflr;
-       u32     sr;
-       u32     imr;
-       u32     isr;
-       u32     risr;
-       u32     txoicr;
-       u32     rxoicr;
-       u32     rxuicr;
-       u32     msticr;
-       u32     icr;
-       u32     dmacr;
-       u32     dmatdlr;
-       u32     dmardlr;
-       u32     idr;
-       u32     version;
-
-       /* Currently operates as 32 bits, though only the low 16 bits matter */
-       u32     dr;
-} __packed;
-
-#define dw_readl(dw, name)             __raw_readl(&(dw)->name)
-#define dw_writel(dw, name, val)       __raw_writel((val), &(dw)->name)
-
-/* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */
-static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0;
-
-static u32 *pclk_spi0;
-/* Always contains an accessable address, start with 0 */
-static struct dw_spi_reg *pspi;
-
-static struct kmsg_dumper dw_dumper;
-static int dumper_registered;
-
-static void dw_kmsg_dump(struct kmsg_dumper *dumper,
-                       enum kmsg_dump_reason reason,
-                       const char *s1, unsigned long l1,
-                       const char *s2, unsigned long l2)
-{
-       int i;
-
-       /* When run to this, we'd better re-init the HW */
-       mrst_early_console_init();
-
-       for (i = 0; i < l1; i++)
-               early_mrst_console.write(&early_mrst_console, s1 + i, 1);
-       for (i = 0; i < l2; i++)
-               early_mrst_console.write(&early_mrst_console, s2 + i, 1);
-}
-
-/* Set the ratio rate to 115200, 8n1, IRQ disabled */
-static void max3110_write_config(void)
-{
-       u16 config;
-
-       config = 0xc001;
-       dw_writel(pspi, dr, config);
-}
-
-/* Translate char to a eligible word and send to max3110 */
-static void max3110_write_data(char c)
-{
-       u16 data;
-
-       data = 0x8000 | c;
-       dw_writel(pspi, dr, data);
-}
-
-void mrst_early_console_init(void)
-{
-       u32 ctrlr0 = 0;
-       u32 spi0_cdiv;
-       u32 freq; /* Freqency info only need be searched once */
-
-       /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */
-       pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
-                                                       MRST_CLK_SPI0_REG);
-       spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9;
-       freq = 100000000 / (spi0_cdiv + 1);
-
-       if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL)
-               mrst_spi_paddr = MRST_REGBASE_SPI1;
-
-       pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
-                                               mrst_spi_paddr);
-
-       /* Disable SPI controller */
-       dw_writel(pspi, ssienr, 0);
-
-       /* Set control param, 8 bits, transmit only mode */
-       ctrlr0 = dw_readl(pspi, ctrl0);
-
-       ctrlr0 &= 0xfcc0;
-       ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET)
-                     | (SPI_TMOD_TO << SPI_TMOD_OFFSET);
-       dw_writel(pspi, ctrl0, ctrlr0);
-
-       /*
-        * Change the spi0 clk to comply with 115200 bps, use 100000 to
-        * calculate the clk dividor to make the clock a little slower
-        * than real baud rate.
-        */
-       dw_writel(pspi, baudr, freq/100000);
-
-       /* Disable all INT for early phase */
-       dw_writel(pspi, imr, 0x0);
-
-       /* Set the cs to spi-uart */
-       dw_writel(pspi, ser, 0x2);
-
-       /* Enable the HW, the last step for HW init */
-       dw_writel(pspi, ssienr, 0x1);
-
-       /* Set the default configuration */
-       max3110_write_config();
-
-       /* Register the kmsg dumper */
-       if (!dumper_registered) {
-               dw_dumper.dump = dw_kmsg_dump;
-               kmsg_dump_register(&dw_dumper);
-               dumper_registered = 1;
-       }
-}
-
-/* Slave select should be called in the read/write function */
-static void early_mrst_spi_putc(char c)
-{
-       unsigned int timeout;
-       u32 sr;
-
-       timeout = MRST_SPI_TIMEOUT;
-       /* Early putc needs to make sure the TX FIFO is not full */
-       while (--timeout) {
-               sr = dw_readl(pspi, sr);
-               if (!(sr & SR_TF_NOT_FULL))
-                       cpu_relax();
-               else
-                       break;
-       }
-
-       if (!timeout)
-               pr_warning("MRST earlycon: timed out\n");
-       else
-               max3110_write_data(c);
-}
-
-/* Early SPI only uses polling mode */
-static void early_mrst_spi_write(struct console *con, const char *str, unsigned n)
-{
-       int i;
-
-       for (i = 0; i < n && *str; i++) {
-               if (*str == '\n')
-                       early_mrst_spi_putc('\r');
-               early_mrst_spi_putc(*str);
-               str++;
-       }
-}
-
-struct console early_mrst_console = {
-       .name =         "earlymrst",
-       .write =        early_mrst_spi_write,
-       .flags =        CON_PRINTBUFFER,
-       .index =        -1,
-};
-
-/*
- * Following is the early console based on Medfield HSU (High
- * Speed UART) device.
- */
-#define HSU_PORT2_PADDR                0xffa28180
-
-static void __iomem *phsu;
-
-void hsu_early_console_init(void)
-{
-       u8 lcr;
-
-       phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
-                                                       HSU_PORT2_PADDR);
-
-       /* Disable FIFO */
-       writeb(0x0, phsu + UART_FCR);
-
-       /* Set to default 115200 bps, 8n1 */
-       lcr = readb(phsu + UART_LCR);
-       writeb((0x80 | lcr), phsu + UART_LCR);
-       writeb(0x18, phsu + UART_DLL);
-       writeb(lcr,  phsu + UART_LCR);
-       writel(0x3600, phsu + UART_MUL*4);
-
-       writeb(0x8, phsu + UART_MCR);
-       writeb(0x7, phsu + UART_FCR);
-       writeb(0x3, phsu + UART_LCR);
-
-       /* Clear IRQ status */
-       readb(phsu + UART_LSR);
-       readb(phsu + UART_RX);
-       readb(phsu + UART_IIR);
-       readb(phsu + UART_MSR);
-
-       /* Enable FIFO */
-       writeb(0x7, phsu + UART_FCR);
-}
-
-#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
-
-static void early_hsu_putc(char ch)
-{
-       unsigned int timeout = 10000; /* 10ms */
-       u8 status;
-
-       while (--timeout) {
-               status = readb(phsu + UART_LSR);
-               if (status & BOTH_EMPTY)
-                       break;
-               udelay(1);
-       }
-
-       /* Only write the char when there was no timeout */
-       if (timeout)
-               writeb(ch, phsu + UART_TX);
-}
-
-static void early_hsu_write(struct console *con, const char *str, unsigned n)
-{
-       int i;
-
-       for (i = 0; i < n && *str; i++) {
-               if (*str == '\n')
-                       early_hsu_putc('\r');
-               early_hsu_putc(*str);
-               str++;
-       }
-}
-
-struct console early_hsu_console = {
-       .name =         "earlyhsu",
-       .write =        early_hsu_write,
-       .flags =        CON_PRINTBUFFER,
-       .index =        -1,
-};
index 3afb33f14d2d2c86a3c961d87aaae531d2631ac8..298448656b6079d074232518cb16e50895b4a5b8 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/module.h>
 
 #include <trace/syscall.h>
 
@@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code);
 int ftrace_arch_code_modify_prepare(void)
 {
        set_kernel_text_rw();
+       set_all_modules_text_rw();
        modifying_code = 1;
        return 0;
 }
@@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void)
 int ftrace_arch_code_modify_post_process(void)
 {
        modifying_code = 0;
+       set_all_modules_text_ro();
        set_kernel_text_ro();
        return 0;
 }
index 763310165fa0d1b0e4bf1891632a729e289980e8..7f138b3c3c52cf2d6790d8308fdc9a6dae8b41b7 100644 (file)
@@ -61,6 +61,9 @@ void __init i386_start_kernel(void)
        case X86_SUBARCH_MRST:
                x86_mrst_early_setup();
                break;
+       case X86_SUBARCH_CE4100:
+               x86_ce4100_early_setup();
+               break;
        default:
                i386_default_early_setup();
                break;
index c0dbd9ac24f0d5cf7e87f8f0439275656b877f73..5707fc8a7a4bcc8527178cb52ca38afad747ee5b 100644 (file)
@@ -316,6 +316,10 @@ ENTRY(startup_32_smp)
        subl $0x80000001, %eax
        cmpl $(0x8000ffff-0x80000001), %eax
        ja 6f
+
+       /* Clear bogus XD_DISABLE bits */
+       call verify_cpu
+
        mov $0x80000001, %eax
        cpuid
        /* Execute Disable bit supported? */
@@ -611,6 +615,8 @@ ignore_int:
 #endif
        iret
 
+#include "verify_cpu.S"
+
        __REFDATA
 .align 4
 ENTRY(initial_code)
index 1cbd54c0df99189548a3a03f40fbb75a1703475a..5940282bd2f94ed886226bc717c189e593adab50 100644 (file)
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
+       /* This is possible if op is under delayed unoptimizing */
+       if (kprobe_disabled(&op->kp))
+               return;
+
        preempt_disable();
        if (kprobe_running()) {
                kprobes_inc_nmissed_count(&op->kp);
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
        return 0;
 }
 
-/* Replace a breakpoint (int3) with a relative jump.  */
-int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+       u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+                                           u8 *insn_buf,
+                                           struct optimized_kprobe *op)
 {
-       unsigned char jmp_code[RELATIVEJUMP_SIZE];
        s32 rel = (s32)((long)op->optinsn.insn -
                        ((long)op->kp.addr + RELATIVEJUMP_SIZE));
 
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
        memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
               RELATIVE_ADDR_SIZE);
 
-       jmp_code[0] = RELATIVEJUMP_OPCODE;
-       *(s32 *)(&jmp_code[1]) = rel;
+       insn_buf[0] = RELATIVEJUMP_OPCODE;
+       *(s32 *)(&insn_buf[1]) = rel;
+
+       tprm->addr = op->kp.addr;
+       tprm->opcode = insn_buf;
+       tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+       struct optimized_kprobe *op, *tmp;
+       int c = 0;
+
+       list_for_each_entry_safe(op, tmp, oplist, list) {
+               WARN_ON(kprobe_disabled(&op->kp));
+               /* Setup param */
+               setup_optimize_kprobe(&jump_poke_params[c],
+                                     jump_poke_bufs[c].buf, op);
+               list_del_init(&op->list);
+               if (++c >= MAX_OPTIMIZE_PROBES)
+                       break;
+       }
 
        /*
         * text_poke_smp doesn't support NMI/MCE code modifying.
         * However, since kprobes itself also doesn't support NMI/MCE
         * code probing, it's not a problem.
         */
-       text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
-       return 0;
+       text_poke_smp_batch(jump_poke_params, c);
+}
+
+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+                                             u8 *insn_buf,
+                                             struct optimized_kprobe *op)
+{
+       /* Set int3 to first byte for kprobes */
+       insn_buf[0] = BREAKPOINT_INSTRUCTION;
+       memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+       tprm->addr = op->kp.addr;
+       tprm->opcode = insn_buf;
+       tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+                                   struct list_head *done_list)
+{
+       struct optimized_kprobe *op, *tmp;
+       int c = 0;
+
+       list_for_each_entry_safe(op, tmp, oplist, list) {
+               /* Setup param */
+               setup_unoptimize_kprobe(&jump_poke_params[c],
+                                       jump_poke_bufs[c].buf, op);
+               list_move(&op->list, done_list);
+               if (++c >= MAX_OPTIMIZE_PROBES)
+                       break;
+       }
+
+       /*
+        * text_poke_smp doesn't support NMI/MCE code modifying.
+        * However, since kprobes itself also doesn't support NMI/MCE
+        * code probing, it's not a problem.
+        */
+       text_poke_smp_batch(jump_poke_params, c);
 }
 
 /* Replace a relative jump with a breakpoint (int3).  */
@@ -1453,11 +1526,35 @@ static int  __kprobes setup_detour_execution(struct kprobe *p,
        }
        return 0;
 }
+
+static int __kprobes init_poke_params(void)
+{
+       /* Allocate code buffer and parameter array */
+       jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+                                MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+       if (!jump_poke_bufs)
+               return -ENOMEM;
+
+       jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+                                  MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+       if (!jump_poke_params) {
+               kfree(jump_poke_bufs);
+               jump_poke_bufs = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+#else  /* !CONFIG_OPTPROBES */
+static int __kprobes init_poke_params(void)
+{
+       return 0;
+}
 #endif
 
 int __init arch_init_kprobes(void)
 {
-       return 0;
+       return init_poke_params();
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
index ce0cb4721c9ac9eec8869e64c1fcd1a1ef0fd379..0fe6d1a66c38cf0aaea3383ac000eefbb5d34fca 100644 (file)
@@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu)
        return 0;
 }
 
-static int get_ucode_data(void *to, const u8 *from, size_t n)
-{
-       memcpy(to, from, n);
-       return 0;
-}
-
 static void *
 get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
 {
@@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
        u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
        void *mc;
 
-       if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR))
-               return NULL;
+       get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
 
        if (section_hdr[0] != UCODE_UCODE_TYPE) {
                pr_err("error: invalid type field in container file section header\n");
@@ -183,16 +176,13 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
                return NULL;
        }
 
-       mc = vmalloc(UCODE_MAX_SIZE);
-       if (mc) {
-               memset(mc, 0, UCODE_MAX_SIZE);
-               if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR,
-                                  total_size)) {
-                       vfree(mc);
-                       mc = NULL;
-               } else
-                       *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
-       }
+       mc = vzalloc(UCODE_MAX_SIZE);
+       if (!mc)
+               return NULL;
+
+       get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
+       *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
+
        return mc;
 }
 
@@ -202,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf)
        unsigned int *buf_pos = (unsigned int *)container_hdr;
        unsigned long size;
 
-       if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE))
-               return 0;
+       get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
 
        size = buf_pos[2];
 
@@ -219,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf)
        }
 
        buf += UCODE_CONTAINER_HEADER_SIZE;
-       if (get_ucode_data(equiv_cpu_table, buf, size)) {
-               vfree(equiv_cpu_table);
-               return 0;
-       }
+       get_ucode_data(equiv_cpu_table, buf, size);
 
        return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
 }
index dcb65cc0a05368ca096e4d907bffe63f9525fb93..1a1b606d3e92a879183cd720e3072b4b0832e579 100644 (file)
@@ -364,8 +364,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 
                /* For performance reasons, reuse mc area when possible */
                if (!mc || mc_size > curr_mc_size) {
-                       if (mc)
-                               vfree(mc);
+                       vfree(mc);
                        mc = vmalloc(mc_size);
                        if (!mc)
                                break;
@@ -374,13 +373,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 
                if (get_ucode_data(mc, ucode_ptr, mc_size) ||
                    microcode_sanity_check(mc) < 0) {
-                       vfree(mc);
                        break;
                }
 
                if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
-                       if (new_mc)
-                               vfree(new_mc);
+                       vfree(new_mc);
                        new_rev = mc_header.rev;
                        new_mc  = mc;
                        mc = NULL;      /* trigger new vmalloc */
@@ -390,12 +387,10 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
                leftover  -= mc_size;
        }
 
-       if (mc)
-               vfree(mc);
+       vfree(mc);
 
        if (leftover) {
-               if (new_mc)
-                       vfree(new_mc);
+               vfree(new_mc);
                state = UCODE_ERROR;
                goto out;
        }
@@ -405,8 +400,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
                goto out;
        }
 
-       if (uci->mc)
-               vfree(uci->mc);
+       vfree(uci->mc);
        uci->mc = (struct microcode_intel *)new_mc;
 
        pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
index ba0f0ca9f280bb0473470fdb71a3fa04faec00f0..c01ffa5b9b87e509da797e359577bbb84463ce5f 100644 (file)
@@ -143,7 +143,7 @@ static void flush_gart(void)
 
        spin_lock_irqsave(&iommu_bitmap_lock, flags);
        if (need_flush) {
-               k8_flush_garts();
+               amd_flush_garts();
                need_flush = false;
        }
        spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -561,17 +561,17 @@ static void enable_gart_translations(void)
 {
        int i;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return;
 
-       for (i = 0; i < k8_northbridges.num; i++) {
-               struct pci_dev *dev = k8_northbridges.nb_misc[i];
+       for (i = 0; i < amd_nb_num(); i++) {
+               struct pci_dev *dev = node_to_amd_nb(i)->misc;
 
                enable_gart_translation(dev, __pa(agp_gatt_table));
        }
 
        /* Flush the GART-TLB to remove stale entries */
-       k8_flush_garts();
+       amd_flush_garts();
 }
 
 /*
@@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev)
        if (!fix_up_north_bridges)
                return;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return;
 
        pr_info("PCI-DMA: Restoring GART aperture settings\n");
 
-       for (i = 0; i < k8_northbridges.num; i++) {
-               struct pci_dev *dev = k8_northbridges.nb_misc[i];
+       for (i = 0; i < amd_nb_num(); i++) {
+               struct pci_dev *dev = node_to_amd_nb(i)->misc;
 
                /*
                 * Don't enable translations just yet.  That is the next
@@ -644,7 +644,7 @@ static struct sys_device device_gart = {
  * Private Northbridge GATT initialization in case we cannot use the
  * AGP driver for some reason.
  */
-static __init int init_k8_gatt(struct agp_kern_info *info)
+static __init int init_amd_gatt(struct agp_kern_info *info)
 {
        unsigned aper_size, gatt_size, new_aper_size;
        unsigned aper_base, new_aper_base;
@@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
        aper_size = aper_base = info->aper_size = 0;
        dev = NULL;
-       for (i = 0; i < k8_northbridges.num; i++) {
-               dev = k8_northbridges.nb_misc[i];
+       for (i = 0; i < amd_nb_num(); i++) {
+               dev = node_to_amd_nb(i)->misc;
                new_aper_base = read_aperture(dev, &new_aper_size);
                if (!new_aper_base)
                        goto nommu;
@@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void)
        if (!no_agp)
                return;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return;
 
-       for (i = 0; i < k8_northbridges.num; i++) {
+       for (i = 0; i < amd_nb_num(); i++) {
                u32 ctl;
 
-               dev = k8_northbridges.nb_misc[i];
+               dev = node_to_amd_nb(i)->misc;
                pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
 
                ctl &= ~GARTEN;
@@ -749,14 +749,14 @@ int __init gart_iommu_init(void)
        unsigned long scratch;
        long i;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return 0;
 
 #ifndef CONFIG_AGP_AMD64
        no_agp = 1;
 #else
        /* Makefile puts PCI initialization via subsys_initcall first. */
-       /* Add other K8 AGP bridge drivers here */
+       /* Add other AMD AGP bridge drivers here */
        no_agp = no_agp ||
                (agp_amd64_init() < 0) ||
                (agp_copy_info(agp_bridge, &info) < 0);
@@ -765,7 +765,7 @@ int __init gart_iommu_init(void)
        if (no_iommu ||
            (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
            !gart_iommu_aperture ||
-           (no_agp && init_k8_gatt(&info) < 0)) {
+           (no_agp && init_amd_gatt(&info) < 0)) {
                if (max_pfn > MAX_DMA32_PFN) {
                        pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
                        pr_warning("falling back to iommu=soft.\n");
index 57d1868a86aadc060bc2260b34139809a98ffab5..c852041bfc3d5b70e792dad0cfd42fdd467c3f38 100644 (file)
@@ -91,8 +91,7 @@ void exit_thread(void)
 void show_regs(struct pt_regs *regs)
 {
        show_registers(regs);
-       show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
-                  regs->bp);
+       show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
 }
 
 void show_regs_common(void)
@@ -374,6 +373,7 @@ void default_idle(void)
 {
        if (hlt_use_halt()) {
                trace_power_start(POWER_CSTATE, 1, smp_processor_id());
+               trace_cpu_idle(1, smp_processor_id());
                current_thread_info()->status &= ~TS_POLLING;
                /*
                 * TS_POLLING-cleared state must be visible before we
@@ -444,6 +444,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
        trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
+       trace_cpu_idle((ax>>4)+1, smp_processor_id());
        if (!need_resched()) {
                if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
                        clflush((void *)&current_thread_info()->flags);
@@ -460,6 +461,7 @@ static void mwait_idle(void)
 {
        if (!need_resched()) {
                trace_power_start(POWER_CSTATE, 1, smp_processor_id());
+               trace_cpu_idle(1, smp_processor_id());
                if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
                        clflush((void *)&current_thread_info()->flags);
 
@@ -481,10 +483,12 @@ static void mwait_idle(void)
 static void poll_idle(void)
 {
        trace_power_start(POWER_CSTATE, 0, smp_processor_id());
+       trace_cpu_idle(0, smp_processor_id());
        local_irq_enable();
        while (!need_resched())
                cpu_relax();
-       trace_power_end(0);
+       trace_power_end(smp_processor_id());
+       trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
 }
 
 /*
index 96586c3cbbbf88dd6479ed250b24ea1112154a22..4b9befa0e347f6f402238d28fea4f9969563a6dd 100644 (file)
@@ -113,8 +113,8 @@ void cpu_idle(void)
                        stop_critical_timings();
                        pm_idle();
                        start_critical_timings();
-
                        trace_power_end(smp_processor_id());
+                       trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
                }
                tick_nohz_restart_sched_tick();
                preempt_enable_no_resched();
index b3d7a3a04f389d9626837a1e776c5106e821f7dc..4c818a73839685c3b1083d2170f02e6758a3bed7 100644 (file)
@@ -142,6 +142,8 @@ void cpu_idle(void)
                        start_critical_timings();
 
                        trace_power_end(smp_processor_id());
+                       trace_cpu_idle(PWR_EVENT_EXIT,
+                                      smp_processor_id());
 
                        /* In many cases the interrupt that ended idle
                           has already called exit_idle. But some idle
index fda313ebbb03dfc98d5d0f953a1b99846e25dc9a..c8e41e90f59ceb9da7be768fcfe197a83602742e 100644 (file)
@@ -43,17 +43,33 @@ static void rdc321x_reset(struct pci_dev *dev)
        outb(1, 0x92);
 }
 
+static void ce4100_reset(struct pci_dev *dev)
+{
+       int i;
+
+       for (i = 0; i < 10; i++) {
+               outb(0x2, 0xcf9);
+               udelay(50);
+       }
+}
+
 struct device_fixup {
        unsigned int vendor;
        unsigned int device;
        void (*reboot_fixup)(struct pci_dev *);
 };
 
+/*
+ * PCI ids solely used for fixups_table go here
+ */
+#define PCI_DEVICE_ID_INTEL_CE4100     0x0708
+
 static const struct device_fixup fixups_table[] = {
 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
 { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
 { PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
+{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset },
 };
 
 /*
index 85268f8eadf667c6034c06882ed661cb7d03e4a8..d3cfe26c0252ab24aaae1481c8c297ac096e6c6d 100644 (file)
@@ -501,7 +501,18 @@ static inline unsigned long long get_total_mem(void)
        return total << PAGE_SHIFT;
 }
 
-#define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF
+/*
+ * Keep the crash kernel below this limit.  On 32 bits earlier kernels
+ * would limit the kernel to the low 512 MiB due to mapping restrictions.
+ * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
+ * limit once kexec-tools are fixed.
+ */
+#ifdef CONFIG_X86_32
+# define CRASH_KERNEL_ADDR_MAX (512 << 20)
+#else
+# define CRASH_KERNEL_ADDR_MAX (896 << 20)
+#endif
+
 static void __init reserve_crashkernel(void)
 {
        unsigned long long total_mem;
@@ -520,10 +531,10 @@ static void __init reserve_crashkernel(void)
                const unsigned long long alignment = 16<<20;    /* 16M */
 
                /*
-                *  kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX
+                *  kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
                 */
                crash_base = memblock_find_in_range(alignment,
-                              DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment);
+                              CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
 
                if (crash_base == MEMBLOCK_ERROR) {
                        pr_info("crashkernel reservation failed - No suitable area found.\n");
@@ -694,7 +705,7 @@ static u64 __init get_max_mapped(void)
 void __init setup_arch(char **cmdline_p)
 {
        int acpi = 0;
-       int k8 = 0;
+       int amd = 0;
        unsigned long flags;
 
 #ifdef CONFIG_X86_32
@@ -980,12 +991,12 @@ void __init setup_arch(char **cmdline_p)
        acpi = acpi_numa_init();
 #endif
 
-#ifdef CONFIG_K8_NUMA
+#ifdef CONFIG_AMD_NUMA
        if (!acpi)
-               k8 = !k8_numa_init(0, max_pfn);
+               amd = !amd_numa_init(0, max_pfn);
 #endif
 
-       initmem_init(0, max_pfn, acpi, k8);
+       initmem_init(0, max_pfn, acpi, amd);
        memblock_find_dma_reserve();
        dma32_reserve_bootmem();
 
@@ -1034,10 +1045,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
        init_apic_mappings();
-       ioapic_init_mappings();
-
-       /* need to wait for io_apic is mapped */
-       probe_nr_irqs_gsi();
+       ioapic_and_gsi_init();
 
        kvm_guest_init();
 
index 083e99d1b7df2aba236563467f47ebb21a09943d..ee886fe10ef4eb8515ae1d2431c0ce8ad3e8dd89 100644 (file)
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void)
         */
        smp_store_cpu_info(cpuid);
 
+       /*
+        * This must be done before setting cpu_online_mask
+        * or calling notify_cpu_starting.
+        */
+       set_cpu_sibling_map(raw_smp_processor_id());
+       wmb();
+
        notify_cpu_starting(cpuid);
 
        /*
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
         */
        check_tsc_sync_target();
 
-       if (nmi_watchdog == NMI_IO_APIC) {
-               legacy_pic->mask(0);
-               enable_NMI_through_LVT0();
-               legacy_pic->unmask(0);
-       }
-
-       /* This must be done before setting cpu_online_mask */
-       set_cpu_sibling_map(raw_smp_processor_id());
-       wmb();
-
        /*
         * We need to hold call_lock, so there is no inconsistency
         * between the time smp_call_function() determines number of
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
                printk(KERN_INFO "SMP mode deactivated.\n");
                smpboot_clear_io_apic();
 
-               localise_nmi_watchdog();
-
                connect_bsp_APIC();
                setup_local_APIC();
                end_local_APIC_setup();
@@ -1166,6 +1161,20 @@ out:
        preempt_enable();
 }
 
+void arch_disable_nonboot_cpus_begin(void)
+{
+       /*
+        * Avoid the smp alternatives switch during the disable_nonboot_cpus().
+        * In the suspend path, we will be back in the SMP mode shortly anyways.
+        */
+       skip_smp_alternatives = true;
+}
+
+void arch_disable_nonboot_cpus_end(void)
+{
+       skip_smp_alternatives = false;
+}
+
 void arch_enable_nonboot_cpus_begin(void)
 {
        set_mtrr_aps_delayed_init();
@@ -1196,7 +1205,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 #ifdef CONFIG_X86_IO_APIC
        setup_ioapic_dest();
 #endif
-       check_nmi_watchdog();
        mtrr_aps_init();
 }
 
@@ -1341,8 +1349,6 @@ int native_cpu_disable(void)
        if (cpu == 0)
                return -EBUSY;
 
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               stop_apic_nmi_watchdog(NULL);
        clear_local_APIC();
 
        cpu_disable_common();
index b53c525368a75cf07489b0327de138bfab5b16d5..938c8e10a19abeae0e2c814e182a0920ff0328f9 100644 (file)
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
  */
 void save_stack_trace(struct stack_trace *trace)
 {
-       dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
+       dump_trace(current, NULL, NULL, &save_stack_ops, trace);
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
-void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp)
+void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
 {
-       dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace);
+       dump_trace(current, regs, NULL, &save_stack_ops, trace);
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
-       dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
+       dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
index fb5cc5e14cfafb6ed62c4de2929b3bfa0a2e91e9..25a28a245937989d2de9abfb94d96835a1b01615 100644 (file)
 #include <asm/hpet.h>
 #include <asm/time.h>
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
-int timer_ack;
-#endif
-
 #ifdef CONFIG_X86_64
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 #endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
        /* Keep nmi watchdog up to date */
        inc_irq_stat(irq0_irqs);
 
-       /* Optimized out for !IO_APIC and x86_64 */
-       if (timer_ack) {
-               /*
-                * Subtle, when I/O APICs are used we have to ack timer IRQ
-                * manually to deassert NMI lines for the watchdog if run
-                * on an 82489DX-based system.
-                */
-               raw_spin_lock(&i8259A_lock);
-               outb(0x0c, PIC_MASTER_OCW3);
-               /* Ack the IRQ; AEOI will end it automatically. */
-               inb(PIC_MASTER_POLL);
-               raw_spin_unlock(&i8259A_lock);
-       }
-
        global_clock_event->event_handler(global_clock_event);
 
        /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
index 3af2dff58b213262d403a8f9c1c55f84920b1d8d..075d130efcf9019eb3f4745677e281533335fa11 100644 (file)
@@ -127,7 +127,7 @@ startup_64:
 no_longmode:
        hlt
        jmp no_longmode
-#include "verify_cpu_64.S"
+#include "verify_cpu.S"
 
        # Careful these need to be in the same 64K segment as the above;
 tidt:
index cb838ca42c9664c2ecf9530d170d27cdbc679f7a..c76aaca5694dd88c6fee8af964e8c62ccd3ad272 100644 (file)
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
 
 static int ignore_nmis;
 
+int unknown_nmi_panic;
+
 static inline void conditional_sti(struct pt_regs *regs)
 {
        if (regs->flags & X86_EFLAGS_IF)
@@ -300,6 +302,13 @@ gp_in_kernel:
        die("general protection fault", regs, error_code);
 }
 
+static int __init setup_unknown_nmi_panic(char *str)
+{
+       unknown_nmi_panic = 1;
+       return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
 static notrace __kprobes void
 mem_parity_error(unsigned char reason, struct pt_regs *regs)
 {
@@ -342,9 +351,11 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
        reason = (reason & 0xf) | 8;
        outb(reason, 0x61);
 
-       i = 2000;
-       while (--i)
-               udelay(1000);
+       i = 20000;
+       while (--i) {
+               touch_nmi_watchdog();
+               udelay(100);
+       }
 
        reason &= ~8;
        outb(reason, 0x61);
@@ -371,7 +382,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
                        reason, smp_processor_id());
 
        printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
-       if (panic_on_unrecovered_nmi)
+       if (unknown_nmi_panic || panic_on_unrecovered_nmi)
                panic("NMI: Not continuing");
 
        printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
@@ -397,20 +408,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
                if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
                                                        == NOTIFY_STOP)
                        return;
-
-#ifndef CONFIG_LOCKUP_DETECTOR
-               /*
-                * Ok, so this is none of the documented NMI sources,
-                * so it must be the NMI watchdog.
-                */
-               if (nmi_watchdog_tick(regs, reason))
-                       return;
-               if (!do_nmi_callback(regs, cpu))
-#endif /* !CONFIG_LOCKUP_DETECTOR */
-                       unknown_nmi_error(reason, regs);
-#else
-               unknown_nmi_error(reason, regs);
 #endif
+               unknown_nmi_error(reason, regs);
 
                return;
        }
@@ -446,14 +445,12 @@ do_nmi(struct pt_regs *regs, long error_code)
 
 void stop_nmi(void)
 {
-       acpi_nmi_disable();
        ignore_nmis++;
 }
 
 void restart_nmi(void)
 {
        ignore_nmis--;
-       acpi_nmi_enable();
 }
 
 /* May run on IST stack. */
index 0c40d8b72416ba2ef7e86bfd812b7bf6f1db2f8f..356a0d455cf997cb1bd586d3a13fd8a7c16d4d3c 100644 (file)
@@ -872,6 +872,9 @@ __cpuinit int unsynchronized_tsc(void)
 
        if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                return 0;
+
+       if (tsc_clocksource_reliable)
+               return 0;
        /*
         * Intel systems are normally all synchronized.
         * Exceptions must mark TSC as unstable:
@@ -879,14 +882,92 @@ __cpuinit int unsynchronized_tsc(void)
        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
                /* assume multi socket systems are not synchronized: */
                if (num_possible_cpus() > 1)
-                       tsc_unstable = 1;
+                       return 1;
        }
 
-       return tsc_unstable;
+       return 0;
+}
+
+
+static void tsc_refine_calibration_work(struct work_struct *work);
+static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
+/**
+ * tsc_refine_calibration_work - Further refine tsc freq calibration
+ * @work - ignored.
+ *
+ * This functions uses delayed work over a period of a
+ * second to further refine the TSC freq value. Since this is
+ * timer based, instead of loop based, we don't block the boot
+ * process while this longer calibration is done.
+ *
+ * If there are any calibration anomolies (too many SMIs, etc),
+ * or the refined calibration is off by 1% of the fast early
+ * calibration, we throw out the new calibration and use the
+ * early calibration.
+ */
+static void tsc_refine_calibration_work(struct work_struct *work)
+{
+       static u64 tsc_start = -1, ref_start;
+       static int hpet;
+       u64 tsc_stop, ref_stop, delta;
+       unsigned long freq;
+
+       /* Don't bother refining TSC on unstable systems */
+       if (check_tsc_unstable())
+               goto out;
+
+       /*
+        * Since the work is started early in boot, we may be
+        * delayed the first time we expire. So set the workqueue
+        * again once we know timers are working.
+        */
+       if (tsc_start == -1) {
+               /*
+                * Only set hpet once, to avoid mixing hardware
+                * if the hpet becomes enabled later.
+                */
+               hpet = is_hpet_enabled();
+               schedule_delayed_work(&tsc_irqwork, HZ);
+               tsc_start = tsc_read_refs(&ref_start, hpet);
+               return;
+       }
+
+       tsc_stop = tsc_read_refs(&ref_stop, hpet);
+
+       /* hpet or pmtimer available ? */
+       if (!hpet && !ref_start && !ref_stop)
+               goto out;
+
+       /* Check, whether the sampling was disturbed by an SMI */
+       if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
+               goto out;
+
+       delta = tsc_stop - tsc_start;
+       delta *= 1000000LL;
+       if (hpet)
+               freq = calc_hpet_ref(delta, ref_start, ref_stop);
+       else
+               freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
+
+       /* Make sure we're within 1% */
+       if (abs(tsc_khz - freq) > tsc_khz/100)
+               goto out;
+
+       tsc_khz = freq;
+       printk(KERN_INFO "Refined TSC clocksource calibration: "
+               "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
+                                       (unsigned long)tsc_khz % 1000);
+
+out:
+       clocksource_register_khz(&clocksource_tsc, tsc_khz);
 }
 
-static void __init init_tsc_clocksource(void)
+
+static int __init init_tsc_clocksource(void)
 {
+       if (!cpu_has_tsc || tsc_disabled > 0)
+               return 0;
+
        if (tsc_clocksource_reliable)
                clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
        /* lower the rating if we already know its unstable: */
@@ -894,8 +975,14 @@ static void __init init_tsc_clocksource(void)
                clocksource_tsc.rating = 0;
                clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
        }
-       clocksource_register_khz(&clocksource_tsc, tsc_khz);
+       schedule_delayed_work(&tsc_irqwork, 0);
+       return 0;
 }
+/*
+ * We use device_initcall here, to ensure we run after the hpet
+ * is fully initialized, which may occur at fs_initcall time.
+ */
+device_initcall(init_tsc_clocksource);
 
 void __init tsc_init(void)
 {
@@ -949,6 +1036,5 @@ void __init tsc_init(void)
                mark_tsc_unstable("TSCs unsynchronized");
 
        check_system_tsc_reliable();
-       init_tsc_clocksource();
 }
 
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
new file mode 100644 (file)
index 0000000..0edefc1
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ *
+ *     verify_cpu.S - Code for cpu long mode and SSE verification. This
+ *     code has been borrowed from boot/setup.S and was introduced by
+ *     Andi Kleen.
+ *
+ *     Copyright (c) 2007  Andi Kleen (ak@suse.de)
+ *     Copyright (c) 2007  Eric Biederman (ebiederm@xmission.com)
+ *     Copyright (c) 2007  Vivek Goyal (vgoyal@in.ibm.com)
+ *     Copyright (c) 2010  Kees Cook (kees.cook@canonical.com)
+ *
+ *     This source code is licensed under the GNU General Public License,
+ *     Version 2.  See the file COPYING for more details.
+ *
+ *     This is a common code for verification whether CPU supports
+ *     long mode and SSE or not. It is not called directly instead this
+ *     file is included at various places and compiled in that context.
+ *     This file is expected to run in 32bit code.  Currently:
+ *
+ *     arch/x86/boot/compressed/head_64.S: Boot cpu verification
+ *     arch/x86/kernel/trampoline_64.S: secondary processor verfication
+ *     arch/x86/kernel/head_32.S: processor startup
+ *
+ *     verify_cpu, returns the status of longmode and SSE in register %eax.
+ *             0: Success    1: Failure
+ *
+ *     On Intel, the XD_DISABLE flag will be cleared as a side-effect.
+ *
+ *     The caller needs to check for the error code and take the action
+ *     appropriately. Either display a message or halt.
+ */
+
+#include <asm/cpufeature.h>
+#include <asm/msr-index.h>
+
+verify_cpu:
+       pushfl                          # Save caller passed flags
+       pushl   $0                      # Kill any dangerous flags
+       popfl
+
+       pushfl                          # standard way to check for cpuid
+       popl    %eax
+       movl    %eax,%ebx
+       xorl    $0x200000,%eax
+       pushl   %eax
+       popfl
+       pushfl
+       popl    %eax
+       cmpl    %eax,%ebx
+       jz      verify_cpu_no_longmode  # cpu has no cpuid
+
+       movl    $0x0,%eax               # See if cpuid 1 is implemented
+       cpuid
+       cmpl    $0x1,%eax
+       jb      verify_cpu_no_longmode  # no cpuid 1
+
+       xor     %di,%di
+       cmpl    $0x68747541,%ebx        # AuthenticAMD
+       jnz     verify_cpu_noamd
+       cmpl    $0x69746e65,%edx
+       jnz     verify_cpu_noamd
+       cmpl    $0x444d4163,%ecx
+       jnz     verify_cpu_noamd
+       mov     $1,%di                  # cpu is from AMD
+       jmp     verify_cpu_check
+
+verify_cpu_noamd:
+       cmpl    $0x756e6547,%ebx        # GenuineIntel?
+       jnz     verify_cpu_check
+       cmpl    $0x49656e69,%edx
+       jnz     verify_cpu_check
+       cmpl    $0x6c65746e,%ecx
+       jnz     verify_cpu_check
+
+       # only call IA32_MISC_ENABLE when:
+       # family > 6 || (family == 6 && model >= 0xd)
+       movl    $0x1, %eax              # check CPU family and model
+       cpuid
+       movl    %eax, %ecx
+
+       andl    $0x0ff00f00, %eax       # mask family and extended family
+       shrl    $8, %eax
+       cmpl    $6, %eax
+       ja      verify_cpu_clear_xd     # family > 6, ok
+       jb      verify_cpu_check        # family < 6, skip
+
+       andl    $0x000f00f0, %ecx       # mask model and extended model
+       shrl    $4, %ecx
+       cmpl    $0xd, %ecx
+       jb      verify_cpu_check        # family == 6, model < 0xd, skip
+
+verify_cpu_clear_xd:
+       movl    $MSR_IA32_MISC_ENABLE, %ecx
+       rdmsr
+       btrl    $2, %edx                # clear MSR_IA32_MISC_ENABLE_XD_DISABLE
+       jnc     verify_cpu_check        # only write MSR if bit was changed
+       wrmsr
+
+verify_cpu_check:
+       movl    $0x1,%eax               # Does the cpu have what it takes
+       cpuid
+       andl    $REQUIRED_MASK0,%edx
+       xorl    $REQUIRED_MASK0,%edx
+       jnz     verify_cpu_no_longmode
+
+       movl    $0x80000000,%eax        # See if extended cpuid is implemented
+       cpuid
+       cmpl    $0x80000001,%eax
+       jb      verify_cpu_no_longmode  # no extended cpuid
+
+       movl    $0x80000001,%eax        # Does the cpu have what it takes
+       cpuid
+       andl    $REQUIRED_MASK1,%edx
+       xorl    $REQUIRED_MASK1,%edx
+       jnz     verify_cpu_no_longmode
+
+verify_cpu_sse_test:
+       movl    $1,%eax
+       cpuid
+       andl    $SSE_MASK,%edx
+       cmpl    $SSE_MASK,%edx
+       je      verify_cpu_sse_ok
+       test    %di,%di
+       jz      verify_cpu_no_longmode  # only try to force SSE on AMD
+       movl    $MSR_K7_HWCR,%ecx
+       rdmsr
+       btr     $15,%eax                # enable SSE
+       wrmsr
+       xor     %di,%di                 # don't loop
+       jmp     verify_cpu_sse_test     # try again
+
+verify_cpu_no_longmode:
+       popfl                           # Restore caller passed flags
+       movl $1,%eax
+       ret
+verify_cpu_sse_ok:
+       popfl                           # Restore caller passed flags
+       xorl %eax, %eax
+       ret
diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu_64.S
deleted file mode 100644 (file)
index 56a8c2a..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- *
- *     verify_cpu.S - Code for cpu long mode and SSE verification. This
- *     code has been borrowed from boot/setup.S and was introduced by
- *     Andi Kleen.
- *
- *     Copyright (c) 2007  Andi Kleen (ak@suse.de)
- *     Copyright (c) 2007  Eric Biederman (ebiederm@xmission.com)
- *     Copyright (c) 2007  Vivek Goyal (vgoyal@in.ibm.com)
- *
- *     This source code is licensed under the GNU General Public License,
- *     Version 2.  See the file COPYING for more details.
- *
- *     This is a common code for verification whether CPU supports
- *     long mode and SSE or not. It is not called directly instead this
- *     file is included at various places and compiled in that context.
- *     Following are the current usage.
- *
- *     This file is included by both 16bit and 32bit code.
- *
- *     arch/x86_64/boot/setup.S : Boot cpu verification (16bit)
- *     arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
- *     arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
- *     arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
- *
- *     verify_cpu, returns the status of cpu check in register %eax.
- *             0: Success    1: Failure
- *
- *     The caller needs to check for the error code and take the action
- *     appropriately. Either display a message or halt.
- */
-
-#include <asm/cpufeature.h>
-#include <asm/msr-index.h>
-
-verify_cpu:
-       pushfl                          # Save caller passed flags
-       pushl   $0                      # Kill any dangerous flags
-       popfl
-
-       pushfl                          # standard way to check for cpuid
-       popl    %eax
-       movl    %eax,%ebx
-       xorl    $0x200000,%eax
-       pushl   %eax
-       popfl
-       pushfl
-       popl    %eax
-       cmpl    %eax,%ebx
-       jz      verify_cpu_no_longmode  # cpu has no cpuid
-
-       movl    $0x0,%eax               # See if cpuid 1 is implemented
-       cpuid
-       cmpl    $0x1,%eax
-       jb      verify_cpu_no_longmode  # no cpuid 1
-
-       xor     %di,%di
-       cmpl    $0x68747541,%ebx        # AuthenticAMD
-       jnz     verify_cpu_noamd
-       cmpl    $0x69746e65,%edx
-       jnz     verify_cpu_noamd
-       cmpl    $0x444d4163,%ecx
-       jnz     verify_cpu_noamd
-       mov     $1,%di                  # cpu is from AMD
-
-verify_cpu_noamd:
-       movl    $0x1,%eax               # Does the cpu have what it takes
-       cpuid
-       andl    $REQUIRED_MASK0,%edx
-       xorl    $REQUIRED_MASK0,%edx
-       jnz     verify_cpu_no_longmode
-
-       movl    $0x80000000,%eax        # See if extended cpuid is implemented
-       cpuid
-       cmpl    $0x80000001,%eax
-       jb      verify_cpu_no_longmode  # no extended cpuid
-
-       movl    $0x80000001,%eax        # Does the cpu have what it takes
-       cpuid
-       andl    $REQUIRED_MASK1,%edx
-       xorl    $REQUIRED_MASK1,%edx
-       jnz     verify_cpu_no_longmode
-
-verify_cpu_sse_test:
-       movl    $1,%eax
-       cpuid
-       andl    $SSE_MASK,%edx
-       cmpl    $SSE_MASK,%edx
-       je      verify_cpu_sse_ok
-       test    %di,%di
-       jz      verify_cpu_no_longmode  # only try to force SSE on AMD
-       movl    $MSR_K7_HWCR,%ecx
-       rdmsr
-       btr     $15,%eax                # enable SSE
-       wrmsr
-       xor     %di,%di                 # don't loop
-       jmp     verify_cpu_sse_test     # try again
-
-verify_cpu_no_longmode:
-       popfl                           # Restore caller passed flags
-       movl $1,%eax
-       ret
-verify_cpu_sse_ok:
-       popfl                           # Restore caller passed flags
-       xorl %eax, %eax
-       ret
index e03530aebfd0332635f901afa9f19f00317fa097..bf4700755184e32d4b4e549bd19f4014caa46468 100644 (file)
@@ -69,7 +69,7 @@ jiffies_64 = jiffies;
 
 PHDRS {
        text PT_LOAD FLAGS(5);          /* R_E */
-       data PT_LOAD FLAGS(7);          /* RWE */
+       data PT_LOAD FLAGS(6);          /* RW_ */
 #ifdef CONFIG_X86_64
        user PT_LOAD FLAGS(5);          /* R_E */
 #ifdef CONFIG_SMP
@@ -116,6 +116,10 @@ SECTIONS
 
        EXCEPTION_TABLE(16) :text = 0x9090
 
+#if defined(CONFIG_DEBUG_RODATA)
+       /* .text should occupy whole number of pages */
+       . = ALIGN(PAGE_SIZE);
+#endif
        X64_ALIGN_DEBUG_RODATA_BEGIN
        RO_DATA(PAGE_SIZE)
        X64_ALIGN_DEBUG_RODATA_END
@@ -335,7 +339,7 @@ SECTIONS
                __bss_start = .;
                *(.bss..page_aligned)
                *(.bss)
-               . = ALIGN(4);
+               . = ALIGN(PAGE_SIZE);
                __bss_stop = .;
        }
 
index f628234fbeca06683876543f07476f10111812b7..3cece05e4ac4622853c8eeb20e3bd04a02f851c7 100644 (file)
@@ -575,6 +575,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
        s->pics[1].elcr_mask = 0xde;
        s->pics[0].pics_state = s;
        s->pics[1].pics_state = s;
+       s->pics[0].isr_ack = 0xff;
+       s->pics[1].isr_ack = 0xff;
 
        /*
         * Initialize PIO device
index fb8b376bf28cb3e04a6bb903900f32838ab02a14..fbb04aee8301efab741f80f7c590a45d7e3b298f 100644 (file)
@@ -2394,7 +2394,8 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                        ASSERT(!VALID_PAGE(root));
                        spin_lock(&vcpu->kvm->mmu_lock);
                        kvm_mmu_free_some_pages(vcpu);
-                       sp = kvm_mmu_get_page(vcpu, i << 30, i << 30,
+                       sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
+                                             i << 30,
                                              PT32_ROOT_LEVEL, 1, ACC_ALL,
                                              NULL);
                        root = __pa(sp->spt);
index 55543397a8a795f295ef6e3731e6b9373c953958..09df2f9a3d69ce36a20ec86c82bc9b719d44a1ae 100644 (file)
@@ -23,7 +23,7 @@ mmiotrace-y                   := kmmio.o pf_in.o mmio-mod.o
 obj-$(CONFIG_MMIOTRACE_TEST)   += testmmiotrace.o
 
 obj-$(CONFIG_NUMA)             += numa.o numa_$(BITS).o
-obj-$(CONFIG_K8_NUMA)          += k8topology_64.o
+obj-$(CONFIG_AMD_NUMA)         += amdtopology_64.o
 obj-$(CONFIG_ACPI_NUMA)                += srat_$(BITS).o
 
 obj-$(CONFIG_HAVE_MEMBLOCK)            += memblock.o
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
new file mode 100644 (file)
index 0000000..51fae9c
--- /dev/null
@@ -0,0 +1,237 @@
+/*
+ * AMD NUMA support.
+ * Discover the memory map and associated nodes.
+ *
+ * This version reads it directly from the AMD northbridge.
+ *
+ * Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/memblock.h>
+
+#include <asm/io.h>
+#include <linux/pci_ids.h>
+#include <linux/acpi.h>
+#include <asm/types.h>
+#include <asm/mmzone.h>
+#include <asm/proto.h>
+#include <asm/e820.h>
+#include <asm/pci-direct.h>
+#include <asm/numa.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <asm/amd_nb.h>
+
+static struct bootnode __initdata nodes[8];
+static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
+
+static __init int find_northbridge(void)
+{
+       int num;
+
+       for (num = 0; num < 32; num++) {
+               u32 header;
+
+               header = read_pci_config(0, num, 0, 0x00);
+               if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)) &&
+                       header != (PCI_VENDOR_ID_AMD | (0x1200<<16)) &&
+                       header != (PCI_VENDOR_ID_AMD | (0x1300<<16)))
+                       continue;
+
+               header = read_pci_config(0, num, 1, 0x00);
+               if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)) &&
+                       header != (PCI_VENDOR_ID_AMD | (0x1201<<16)) &&
+                       header != (PCI_VENDOR_ID_AMD | (0x1301<<16)))
+                       continue;
+               return num;
+       }
+
+       return -1;
+}
+
+static __init void early_get_boot_cpu_id(void)
+{
+       /*
+        * need to get the APIC ID of the BSP so can use that to
+        * create apicid_to_node in amd_scan_nodes()
+        */
+#ifdef CONFIG_X86_MPPARSE
+       /*
+        * get boot-time SMP configuration:
+        */
+       if (smp_found_config)
+               early_get_smp_config();
+#endif
+       early_init_lapic_mapping();
+}
+
+int __init amd_get_nodes(struct bootnode *physnodes)
+{
+       int i;
+       int ret = 0;
+
+       for_each_node_mask(i, nodes_parsed) {
+               physnodes[ret].start = nodes[i].start;
+               physnodes[ret].end = nodes[i].end;
+               ret++;
+       }
+       return ret;
+}
+
+int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
+{
+       unsigned long start = PFN_PHYS(start_pfn);
+       unsigned long end = PFN_PHYS(end_pfn);
+       unsigned numnodes;
+       unsigned long prevbase;
+       int i, nb, found = 0;
+       u32 nodeid, reg;
+
+       if (!early_pci_allowed())
+               return -1;
+
+       nb = find_northbridge();
+       if (nb < 0)
+               return nb;
+
+       pr_info("Scanning NUMA topology in Northbridge %d\n", nb);
+
+       reg = read_pci_config(0, nb, 0, 0x60);
+       numnodes = ((reg >> 4) & 0xF) + 1;
+       if (numnodes <= 1)
+               return -1;
+
+       pr_info("Number of physical nodes %d\n", numnodes);
+
+       prevbase = 0;
+       for (i = 0; i < 8; i++) {
+               unsigned long base, limit;
+
+               base = read_pci_config(0, nb, 1, 0x40 + i*8);
+               limit = read_pci_config(0, nb, 1, 0x44 + i*8);
+
+               nodeid = limit & 7;
+               if ((base & 3) == 0) {
+                       if (i < numnodes)
+                               pr_info("Skipping disabled node %d\n", i);
+                       continue;
+               }
+               if (nodeid >= numnodes) {
+                       pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
+                               base, limit);
+                       continue;
+               }
+
+               if (!limit) {
+                       pr_info("Skipping node entry %d (base %lx)\n",
+                               i, base);
+                       continue;
+               }
+               if ((base >> 8) & 3 || (limit >> 8) & 3) {
+                       pr_err("Node %d using interleaving mode %lx/%lx\n",
+                              nodeid, (base >> 8) & 3, (limit >> 8) & 3);
+                       return -1;
+               }
+               if (node_isset(nodeid, nodes_parsed)) {
+                       pr_info("Node %d already present, skipping\n",
+                               nodeid);
+                       continue;
+               }
+
+               limit >>= 16;
+               limit <<= 24;
+               limit |= (1<<24)-1;
+               limit++;
+
+               if (limit > end)
+                       limit = end;
+               if (limit <= base)
+                       continue;
+
+               base >>= 16;
+               base <<= 24;
+
+               if (base < start)
+                       base = start;
+               if (limit > end)
+                       limit = end;
+               if (limit == base) {
+                       pr_err("Empty node %d\n", nodeid);
+                       continue;
+               }
+               if (limit < base) {
+                       pr_err("Node %d bogus settings %lx-%lx.\n",
+                              nodeid, base, limit);
+                       continue;
+               }
+
+               /* Could sort here, but pun for now. Should not happen anyroads. */
+               if (prevbase > base) {
+                       pr_err("Node map not sorted %lx,%lx\n",
+                              prevbase, base);
+                       return -1;
+               }
+
+               pr_info("Node %d MemBase %016lx Limit %016lx\n",
+                       nodeid, base, limit);
+
+               found++;
+
+               nodes[nodeid].start = base;
+               nodes[nodeid].end = limit;
+
+               prevbase = base;
+
+               node_set(nodeid, nodes_parsed);
+       }
+
+       if (!found)
+               return -1;
+       return 0;
+}
+
+int __init amd_scan_nodes(void)
+{
+       unsigned int bits;
+       unsigned int cores;
+       unsigned int apicid_base;
+       int i;
+
+       BUG_ON(nodes_empty(nodes_parsed));
+       node_possible_map = nodes_parsed;
+       memnode_shift = compute_hash_shift(nodes, 8, NULL);
+       if (memnode_shift < 0) {
+               pr_err("No NUMA node hash function found. Contact maintainer\n");
+               return -1;
+       }
+       pr_info("Using node hash shift of %d\n", memnode_shift);
+
+       /* use the coreid bits from early_identify_cpu */
+       bits = boot_cpu_data.x86_coreid_bits;
+       cores = (1<<bits);
+       apicid_base = 0;
+       /* get the APIC ID of the BSP early for systems with apicid lifting */
+       early_get_boot_cpu_id();
+       if (boot_cpu_physical_apicid > 0) {
+               pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
+               apicid_base = boot_cpu_physical_apicid;
+       }
+
+       for_each_node_mask(i, node_possible_map) {
+               int j;
+
+               memblock_x86_register_active_regions(i,
+                               nodes[i].start >> PAGE_SHIFT,
+                               nodes[i].end >> PAGE_SHIFT);
+               for (j = apicid_base; j < cores + apicid_base; j++)
+                       apicid_to_node[(i << bits) + j] = i;
+               setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+       }
+
+       numa_init_array();
+       return 0;
+}
index c0e28a13de7df55c1ee1b173b61fd2d18c50e49c..947f42abe820eed9e47388ff3fcfd6fc937bb96a 100644 (file)
@@ -364,8 +364,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
        /*
         * We just marked the kernel text read only above, now that
         * we are going to free part of that, we need to make that
-        * writeable first.
+        * writeable and non-executable first.
         */
+       set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
        set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
 
        printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
index 0e969f9f401b72bddf403325e2a3a862199ee07b..f89b5bb4e93f82926f339054aa37f5bd4e829216 100644 (file)
@@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
 
 static inline int is_kernel_text(unsigned long addr)
 {
-       if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
+       if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
                return 1;
        return 0;
 }
@@ -912,6 +912,23 @@ void set_kernel_text_ro(void)
        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 }
 
+static void mark_nxdata_nx(void)
+{
+       /*
+        * When this called, init has already been executed and released,
+        * so everything past _etext sould be NX.
+        */
+       unsigned long start = PFN_ALIGN(_etext);
+       /*
+        * This comes from is_kernel_text upper limit. Also HPAGE where used:
+        */
+       unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
+
+       if (__supported_pte_mask & _PAGE_NX)
+               printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
+       set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT);
+}
+
 void mark_rodata_ro(void)
 {
        unsigned long start = PFN_ALIGN(_text);
@@ -946,6 +963,7 @@ void mark_rodata_ro(void)
        printk(KERN_INFO "Testing CPA: write protecting again\n");
        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 #endif
+       mark_nxdata_nx();
 }
 #endif
 
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
deleted file mode 100644 (file)
index 804a3b6..0000000
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * AMD K8 NUMA support.
- * Discover the memory map and associated nodes.
- *
- * This version reads it directly from the K8 northbridge.
- *
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-#include <linux/memblock.h>
-
-#include <asm/io.h>
-#include <linux/pci_ids.h>
-#include <linux/acpi.h>
-#include <asm/types.h>
-#include <asm/mmzone.h>
-#include <asm/proto.h>
-#include <asm/e820.h>
-#include <asm/pci-direct.h>
-#include <asm/numa.h>
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#include <asm/amd_nb.h>
-
-static struct bootnode __initdata nodes[8];
-static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
-
-static __init int find_northbridge(void)
-{
-       int num;
-
-       for (num = 0; num < 32; num++) {
-               u32 header;
-
-               header = read_pci_config(0, num, 0, 0x00);
-               if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)) &&
-                       header != (PCI_VENDOR_ID_AMD | (0x1200<<16)) &&
-                       header != (PCI_VENDOR_ID_AMD | (0x1300<<16)))
-                       continue;
-
-               header = read_pci_config(0, num, 1, 0x00);
-               if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)) &&
-                       header != (PCI_VENDOR_ID_AMD | (0x1201<<16)) &&
-                       header != (PCI_VENDOR_ID_AMD | (0x1301<<16)))
-                       continue;
-               return num;
-       }
-
-       return -1;
-}
-
-static __init void early_get_boot_cpu_id(void)
-{
-       /*
-        * need to get the APIC ID of the BSP so can use that to
-        * create apicid_to_node in k8_scan_nodes()
-        */
-#ifdef CONFIG_X86_MPPARSE
-       /*
-        * get boot-time SMP configuration:
-        */
-       if (smp_found_config)
-               early_get_smp_config();
-#endif
-       early_init_lapic_mapping();
-}
-
-int __init k8_get_nodes(struct bootnode *physnodes)
-{
-       int i;
-       int ret = 0;
-
-       for_each_node_mask(i, nodes_parsed) {
-               physnodes[ret].start = nodes[i].start;
-               physnodes[ret].end = nodes[i].end;
-               ret++;
-       }
-       return ret;
-}
-
-int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
-{
-       unsigned long start = PFN_PHYS(start_pfn);
-       unsigned long end = PFN_PHYS(end_pfn);
-       unsigned numnodes;
-       unsigned long prevbase;
-       int i, nb, found = 0;
-       u32 nodeid, reg;
-
-       if (!early_pci_allowed())
-               return -1;
-
-       nb = find_northbridge();
-       if (nb < 0)
-               return nb;
-
-       pr_info("Scanning NUMA topology in Northbridge %d\n", nb);
-
-       reg = read_pci_config(0, nb, 0, 0x60);
-       numnodes = ((reg >> 4) & 0xF) + 1;
-       if (numnodes <= 1)
-               return -1;
-
-       pr_info("Number of physical nodes %d\n", numnodes);
-
-       prevbase = 0;
-       for (i = 0; i < 8; i++) {
-               unsigned long base, limit;
-
-               base = read_pci_config(0, nb, 1, 0x40 + i*8);
-               limit = read_pci_config(0, nb, 1, 0x44 + i*8);
-
-               nodeid = limit & 7;
-               if ((base & 3) == 0) {
-                       if (i < numnodes)
-                               pr_info("Skipping disabled node %d\n", i);
-                       continue;
-               }
-               if (nodeid >= numnodes) {
-                       pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
-                               base, limit);
-                       continue;
-               }
-
-               if (!limit) {
-                       pr_info("Skipping node entry %d (base %lx)\n",
-                               i, base);
-                       continue;
-               }
-               if ((base >> 8) & 3 || (limit >> 8) & 3) {
-                       pr_err("Node %d using interleaving mode %lx/%lx\n",
-                              nodeid, (base >> 8) & 3, (limit >> 8) & 3);
-                       return -1;
-               }
-               if (node_isset(nodeid, nodes_parsed)) {
-                       pr_info("Node %d already present, skipping\n",
-                               nodeid);
-                       continue;
-               }
-
-               limit >>= 16;
-               limit <<= 24;
-               limit |= (1<<24)-1;
-               limit++;
-
-               if (limit > end)
-                       limit = end;
-               if (limit <= base)
-                       continue;
-
-               base >>= 16;
-               base <<= 24;
-
-               if (base < start)
-                       base = start;
-               if (limit > end)
-                       limit = end;
-               if (limit == base) {
-                       pr_err("Empty node %d\n", nodeid);
-                       continue;
-               }
-               if (limit < base) {
-                       pr_err("Node %d bogus settings %lx-%lx.\n",
-                              nodeid, base, limit);
-                       continue;
-               }
-
-               /* Could sort here, but pun for now. Should not happen anyroads. */
-               if (prevbase > base) {
-                       pr_err("Node map not sorted %lx,%lx\n",
-                              prevbase, base);
-                       return -1;
-               }
-
-               pr_info("Node %d MemBase %016lx Limit %016lx\n",
-                       nodeid, base, limit);
-
-               found++;
-
-               nodes[nodeid].start = base;
-               nodes[nodeid].end = limit;
-
-               prevbase = base;
-
-               node_set(nodeid, nodes_parsed);
-       }
-
-       if (!found)
-               return -1;
-       return 0;
-}
-
-int __init k8_scan_nodes(void)
-{
-       unsigned int bits;
-       unsigned int cores;
-       unsigned int apicid_base;
-       int i;
-
-       BUG_ON(nodes_empty(nodes_parsed));
-       node_possible_map = nodes_parsed;
-       memnode_shift = compute_hash_shift(nodes, 8, NULL);
-       if (memnode_shift < 0) {
-               pr_err("No NUMA node hash function found. Contact maintainer\n");
-               return -1;
-       }
-       pr_info("Using node hash shift of %d\n", memnode_shift);
-
-       /* use the coreid bits from early_identify_cpu */
-       bits = boot_cpu_data.x86_coreid_bits;
-       cores = (1<<bits);
-       apicid_base = 0;
-       /* get the APIC ID of the BSP early for systems with apicid lifting */
-       early_get_boot_cpu_id();
-       if (boot_cpu_physical_apicid > 0) {
-               pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
-               apicid_base = boot_cpu_physical_apicid;
-       }
-
-       for_each_node_mask(i, node_possible_map) {
-               int j;
-
-               memblock_x86_register_active_regions(i,
-                               nodes[i].start >> PAGE_SHIFT,
-                               nodes[i].end >> PAGE_SHIFT);
-               for (j = apicid_base; j < cores + apicid_base; j++)
-                       apicid_to_node[(i << bits) + j] = i;
-               setup_node_bootmem(i, nodes[i].start, nodes[i].end);
-       }
-
-       numa_init_array();
-       return 0;
-}
index af3b6c8a436f7b7ec49a2366738a28faa58cdfc5..704a37cedddb59404a3c1fc773e44853b2089939 100644 (file)
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
        e->trace.entries = e->trace_entries;
        e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
        e->trace.skip = 0;
-       save_stack_trace_bp(&e->trace, regs->bp);
+       save_stack_trace_regs(&e->trace, regs);
 
        /* Round address down to nearest 16 bytes */
        shadow_copy = kmemcheck_shadow_lookup(address
index 7ffc9b727efdc95ee6748acd1a1b646c8e164ffb..7762a517d69d9233a7a6e419a5eaa9ae6c94ee53 100644 (file)
@@ -264,7 +264,7 @@ static struct bootnode physnodes[MAX_NUMNODES] __initdata;
 static char *cmdline __initdata;
 
 static int __init setup_physnodes(unsigned long start, unsigned long end,
-                                       int acpi, int k8)
+                                       int acpi, int amd)
 {
        int nr_nodes = 0;
        int ret = 0;
@@ -274,13 +274,13 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
        if (acpi)
                nr_nodes = acpi_get_nodes(physnodes);
 #endif
-#ifdef CONFIG_K8_NUMA
-       if (k8)
-               nr_nodes = k8_get_nodes(physnodes);
+#ifdef CONFIG_AMD_NUMA
+       if (amd)
+               nr_nodes = amd_get_nodes(physnodes);
 #endif
        /*
         * Basic sanity checking on the physical node map: there may be errors
-        * if the SRAT or K8 incorrectly reported the topology or the mem=
+        * if the SRAT or AMD code incorrectly reported the topology or the mem=
         * kernel parameter is used.
         */
        for (i = 0; i < nr_nodes; i++) {
@@ -549,7 +549,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
  * numa=fake command-line option.
  */
 static int __init numa_emulation(unsigned long start_pfn,
-                       unsigned long last_pfn, int acpi, int k8)
+                       unsigned long last_pfn, int acpi, int amd)
 {
        u64 addr = start_pfn << PAGE_SHIFT;
        u64 max_addr = last_pfn << PAGE_SHIFT;
@@ -557,7 +557,7 @@ static int __init numa_emulation(unsigned long start_pfn,
        int num_nodes;
        int i;
 
-       num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
+       num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd);
        /*
         * If the numa=fake command-line contains a 'M' or 'G', it represents
         * the fixed node size.  Otherwise, if it is just a single number N,
@@ -602,7 +602,7 @@ static int __init numa_emulation(unsigned long start_pfn,
 #endif /* CONFIG_NUMA_EMU */
 
 void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
-                               int acpi, int k8)
+                               int acpi, int amd)
 {
        int i;
 
@@ -610,7 +610,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
        nodes_clear(node_online_map);
 
 #ifdef CONFIG_NUMA_EMU
-       if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8))
+       if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
                return;
        nodes_clear(node_possible_map);
        nodes_clear(node_online_map);
@@ -624,8 +624,8 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
        nodes_clear(node_online_map);
 #endif
 
-#ifdef CONFIG_K8_NUMA
-       if (!numa_off && k8 && !k8_scan_nodes())
+#ifdef CONFIG_AMD_NUMA
+       if (!numa_off && amd && !amd_scan_nodes())
                return;
        nodes_clear(node_possible_map);
        nodes_clear(node_online_map);
index 532e7933d606fdbdde77aacdb2de746f249d2bf2..8b830ca14ac46c08facc1a848ddcb3c42c0d56cf 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/pfn.h>
 #include <linux/percpu.h>
 #include <linux/gfp.h>
+#include <linux/pci.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
@@ -255,13 +256,16 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
                                   unsigned long pfn)
 {
        pgprot_t forbidden = __pgprot(0);
+       pgprot_t required = __pgprot(0);
 
        /*
         * The BIOS area between 640k and 1Mb needs to be executable for
         * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
         */
-       if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
+#ifdef CONFIG_PCI_BIOS
+       if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
                pgprot_val(forbidden) |= _PAGE_NX;
+#endif
 
        /*
         * The kernel text needs to be executable for obvious reasons
@@ -278,6 +282,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
        if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
                   __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
                pgprot_val(forbidden) |= _PAGE_RW;
+       /*
+        * .data and .bss should always be writable.
+        */
+       if (within(address, (unsigned long)_sdata, (unsigned long)_edata) ||
+           within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop))
+               pgprot_val(required) |= _PAGE_RW;
 
 #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
        /*
@@ -317,6 +327,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
 #endif
 
        prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
+       prot = __pgprot(pgprot_val(prot) | pgprot_val(required));
 
        return prot;
 }
@@ -393,7 +404,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 {
        unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
        pte_t new_pte, old_pte, *tmp;
-       pgprot_t old_prot, new_prot;
+       pgprot_t old_prot, new_prot, req_prot;
        int i, do_split = 1;
        unsigned int level;
 
@@ -438,10 +449,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
         * We are safe now. Check whether the new pgprot is the same:
         */
        old_pte = *kpte;
-       old_prot = new_prot = pte_pgprot(old_pte);
+       old_prot = new_prot = req_prot = pte_pgprot(old_pte);
 
-       pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
-       pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+       pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
+       pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
 
        /*
         * old_pte points to the large page base address. So we need
@@ -450,17 +461,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
        pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
        cpa->pfn = pfn;
 
-       new_prot = static_protections(new_prot, address, pfn);
+       new_prot = static_protections(req_prot, address, pfn);
 
        /*
         * We need to check the full range, whether
         * static_protection() requires a different pgprot for one of
         * the pages in the range we try to preserve:
         */
-       addr = address + PAGE_SIZE;
-       pfn++;
-       for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) {
-               pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
+       addr = address & pmask;
+       pfn = pte_pfn(old_pte);
+       for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
+               pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
 
                if (pgprot_val(chk_prot) != pgprot_val(new_prot))
                        goto out_unlock;
@@ -483,7 +494,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
         * that we limited the number of possible pages already to
         * the number of pages in the large page.
         */
-       if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
+       if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) {
                /*
                 * The address is aligned and the number of pages
                 * covers the full page.
index a3250aa34086fce7d376e9e1e464fa2e996dbb6d..410531d3c292d20cde9b40a487711930eab51eb2 100644 (file)
@@ -41,7 +41,7 @@ void __init x86_report_nx(void)
 {
        if (!cpu_has_nx) {
                printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
-                      "missing in CPU or disabled in BIOS!\n");
+                      "missing in CPU!\n");
        } else {
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
                if (disable_nx) {
index a17dffd136c143898e91187cbd39d005b05779b6..f16434568a51da26ea8524ae11fa84f0c7405717 100644 (file)
@@ -92,6 +92,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
        /* mark this node as "seen" in node bitmap */
        BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
 
+       /* don't need to check apic_id here, because it is always 8 bits */
        apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
 
        printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
index a35cb9d8b0606bc8f7123cd15f0017972a5e8dda..171a0aacb99a0874373619f4fd51ed955e2ddb9e 100644 (file)
@@ -134,6 +134,10 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
        }
 
        apic_id = pa->apic_id;
+       if (apic_id >= MAX_LOCAL_APIC) {
+               printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
+               return;
+       }
        apicid_to_node[apic_id] = node;
        node_set(node, cpu_nodes_parsed);
        acpi_numa = 1;
@@ -168,6 +172,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
                apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
        else
                apic_id = pa->apic_id;
+
+       if (apic_id >= MAX_LOCAL_APIC) {
+               printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
+               return;
+       }
+
        apicid_to_node[apic_id] = node;
        node_set(node, cpu_nodes_parsed);
        acpi_numa = 1;
index 2d49d4e19a3619c0be2c7d17a892b8aea582048f..72cbec14d783867cb5f5fa8547eb6fcee2fe28b9 100644 (file)
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
        if (!user_mode_vm(regs)) {
                unsigned long stack = kernel_stack_pointer(regs);
                if (depth)
-                       dump_trace(NULL, regs, (unsigned long *)stack, 0,
+                       dump_trace(NULL, regs, (unsigned long *)stack,
                                   &backtrace_ops, &depth);
                return;
        }
index 4e8baad36d37739e32b71da0be2932924bacfe69..358c8b9c96a79c725766e1627544486eb312a0bc 100644 (file)
@@ -732,6 +732,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
                case 0x14:
                        cpu_type = "x86-64/family14h";
                        break;
+               case 0x15:
+                       cpu_type = "x86-64/family15h";
+                       break;
                default:
                        return -ENODEV;
                }
index e3ecb71b5790228073d5eb089f4a4fc9303ab09a..0636dd93cef8d64a718124ddfa7c5a02edd2174f 100644 (file)
@@ -58,9 +58,6 @@ static void timer_stop(void)
 
 int __init op_nmi_timer_init(struct oprofile_operations *ops)
 {
-       if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
-               return -ENODEV;
-
        ops->start = timer_start;
        ops->stop = timer_stop;
        ops->cpu_type = "timer";
index a011bcc0f94331d82c8abfa7d4afdbbd0c59eff5..c3b8e24f2b16f4f6441c286268a61ac6a320b7c3 100644 (file)
 #include "op_x86_model.h"
 #include "op_counter.h"
 
-#define NUM_COUNTERS 4
+#define NUM_COUNTERS           4
+#define NUM_COUNTERS_F15H      6
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-#define NUM_VIRT_COUNTERS 32
+#define NUM_VIRT_COUNTERS      32
 #else
-#define NUM_VIRT_COUNTERS NUM_COUNTERS
+#define NUM_VIRT_COUNTERS      0
 #endif
 
 #define OP_EVENT_MASK                  0x0FFF
@@ -41,7 +42,8 @@
 
 #define MSR_AMD_EVENTSEL_RESERVED      ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
 
-static unsigned long reset_value[NUM_VIRT_COUNTERS];
+static int num_counters;
+static unsigned long reset_value[OP_MAX_COUNTER];
 
 #define IBS_FETCH_SIZE                 6
 #define IBS_OP_SIZE                    12
@@ -387,7 +389,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
        int i;
 
        /* enable active counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -406,7 +408,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 {
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!msrs->counters[i].addr)
                        continue;
                release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
@@ -418,7 +420,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
 {
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; i++) {
+       for (i = 0; i < num_counters; i++) {
                if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
                        goto fail;
                if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
@@ -426,8 +428,13 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
                        goto fail;
                }
                /* both registers must be reserved */
-               msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
-               msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+               if (num_counters == NUM_COUNTERS_F15H) {
+                       msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
+                       msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
+               } else {
+                       msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+                       msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
+               }
                continue;
        fail:
                if (!counter_config[i].enabled)
@@ -447,7 +454,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        int i;
 
        /* setup reset_value */
-       for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
+       for (i = 0; i < OP_MAX_COUNTER; ++i) {
                if (counter_config[i].enabled
                    && msrs->counters[op_x86_virt_to_phys(i)].addr)
                        reset_value[i] = counter_config[i].count;
@@ -456,7 +463,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        }
 
        /* clear all counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!msrs->controls[i].addr)
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -472,7 +479,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        }
 
        /* enable active counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -503,7 +510,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
        u64 val;
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -526,7 +533,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
        u64 val;
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!reset_value[op_x86_phys_to_virt(i)])
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -546,7 +553,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
         * Subtle: stop on all counters to avoid race with setting our
         * pm callback
         */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!reset_value[op_x86_phys_to_virt(i)])
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -603,6 +610,7 @@ static int force_ibs_eilvt_setup(void)
                ret = setup_ibs_ctl(i);
                if (ret)
                        return ret;
+               pr_err(FW_BUG "using offset %d for IBS interrupts\n", i);
                return 0;
        }
 
@@ -630,21 +638,29 @@ static int __init_ibs_nmi(void)
        return 0;
 }
 
-/* initialize the APIC for the IBS interrupts if available */
+/*
+ * check and reserve APIC extended interrupt LVT offset for IBS if
+ * available
+ *
+ * init_ibs() preforms implicitly cpu-local operations, so pin this
+ * thread to its current CPU
+ */
+
 static void init_ibs(void)
 {
-       ibs_caps = get_ibs_caps();
+       preempt_disable();
 
+       ibs_caps = get_ibs_caps();
        if (!ibs_caps)
-               return;
+               goto out;
 
-       if (__init_ibs_nmi()) {
+       if (__init_ibs_nmi() < 0)
                ibs_caps = 0;
-               return;
-       }
+       else
+               printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
 
-       printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n",
-              (unsigned)ibs_caps);
+out:
+       preempt_enable();
 }
 
 static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
@@ -698,18 +714,29 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
        return 0;
 }
 
+struct op_x86_model_spec op_amd_spec;
+
 static int op_amd_init(struct oprofile_operations *ops)
 {
        init_ibs();
        create_arch_files = ops->create_files;
        ops->create_files = setup_ibs_files;
+
+       if (boot_cpu_data.x86 == 0x15) {
+               num_counters = NUM_COUNTERS_F15H;
+       } else {
+               num_counters = NUM_COUNTERS;
+       }
+
+       op_amd_spec.num_counters = num_counters;
+       op_amd_spec.num_controls = num_counters;
+       op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
+
        return 0;
 }
 
 struct op_x86_model_spec op_amd_spec = {
-       .num_counters           = NUM_COUNTERS,
-       .num_controls           = NUM_COUNTERS,
-       .num_virt_counters      = NUM_VIRT_COUNTERS,
+       /* num_counters/num_controls filled in at runtime */
        .reserved               = MSR_AMD_EVENTSEL_RESERVED,
        .event_mask             = OP_EVENT_MASK,
        .init                   = op_amd_init,
index 182558dd5515add420a27dfa58304d04b6a6f71a..9fadec074142b11afcb39e73627dc4c4fd8e14dd 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/oprofile.h>
 #include <linux/smp.h>
 #include <linux/ptrace.h>
-#include <linux/nmi.h>
+#include <asm/nmi.h>
 #include <asm/msr.h>
 #include <asm/fixmap.h>
 #include <asm/apic.h>
index effd96e33f16690c3dd318de9886bee75e5fd699..6b8759f7634e661de3983dbc7e6accb26e939a8c 100644 (file)
@@ -7,6 +7,7 @@ obj-$(CONFIG_PCI_OLPC)          += olpc.o
 obj-$(CONFIG_PCI_XEN)          += xen.o
 
 obj-y                          += fixup.o
+obj-$(CONFIG_X86_INTEL_CE)      += ce4100.o
 obj-$(CONFIG_ACPI)             += acpi.o
 obj-y                          += legacy.o irq.o
 
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
new file mode 100644 (file)
index 0000000..85b68ef
--- /dev/null
@@ -0,0 +1,315 @@
+/*
+ *  GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2010 Intel Corporation. All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *  The full GNU General Public License is included in this distribution
+ *  in the file called LICENSE.GPL.
+ *
+ *  Contact Information:
+ *    Intel Corporation
+ *    2200 Mission College Blvd.
+ *    Santa Clara, CA  97052
+ *
+ * This provides access methods for PCI registers that mis-behave on
+ * the CE4100. Each register can be assigned a private init, read and
+ * write routine. The exception to this is the bridge device.  The
+ * bridge device is the only device on bus zero (0) that requires any
+ * fixup so it is a special case ATM
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include <asm/pci_x86.h>
+
+struct sim_reg {
+       u32 value;
+       u32 mask;
+};
+
+struct sim_dev_reg {
+       int dev_func;
+       int reg;
+       void (*init)(struct sim_dev_reg *reg);
+       void (*read)(struct sim_dev_reg *reg, u32 *value);
+       void (*write)(struct sim_dev_reg *reg, u32 value);
+       struct sim_reg sim_reg;
+};
+
+struct sim_reg_op {
+       void (*init)(struct sim_dev_reg *reg);
+       void (*read)(struct sim_dev_reg *reg, u32 value);
+       void (*write)(struct sim_dev_reg *reg, u32 value);
+};
+
+#define MB (1024 * 1024)
+#define KB (1024)
+#define SIZE_TO_MASK(size) (~(size - 1))
+
+#define DEFINE_REG(device, func, offset, size, init_op, read_op, write_op)\
+{ PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\
+       {0, SIZE_TO_MASK(size)} },
+
+static void reg_init(struct sim_dev_reg *reg)
+{
+       pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4,
+                             &reg->sim_reg.value);
+}
+
+static void reg_read(struct sim_dev_reg *reg, u32 *value)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&pci_config_lock, flags);
+       *value = reg->sim_reg.value;
+       raw_spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+
+static void reg_write(struct sim_dev_reg *reg, u32 value)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&pci_config_lock, flags);
+       reg->sim_reg.value = (value & reg->sim_reg.mask) |
+               (reg->sim_reg.value & ~reg->sim_reg.mask);
+       raw_spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+
+static void sata_reg_init(struct sim_dev_reg *reg)
+{
+       pci_direct_conf1.read(0, 1, PCI_DEVFN(14, 0), 0x10, 4,
+                             &reg->sim_reg.value);
+       reg->sim_reg.value += 0x400;
+}
+
+static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value)
+{
+       reg_read(reg, value);
+       if (*value != reg->sim_reg.mask)
+               *value |= 0x100;
+}
+
+void sata_revid_init(struct sim_dev_reg *reg)
+{
+       reg->sim_reg.value = 0x01060100;
+       reg->sim_reg.mask = 0;
+}
+
+static void sata_revid_read(struct sim_dev_reg *reg, u32 *value)
+{
+       reg_read(reg, value);
+}
+
+static struct sim_dev_reg bus1_fixups[] = {
+       DEFINE_REG(2, 0, 0x10, (16*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(2, 0, 0x14, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(2, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(3, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(4, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(4, 1, 0x10, (128*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(6, 0, 0x10, (512*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(6, 1, 0x10, (512*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(6, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(8, 0, 0x10, (1*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(8, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(8, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(9, 0, 0x10 , (1*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(9, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(10, 0, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(10, 0, 0x14, (256*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 0, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 0, 0x14, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 1, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 2, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 2, 0x14, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 2, 0x18, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 3, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 3, 0x14, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 4, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 5, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 6, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 7, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(12, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(12, 0, 0x14, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(12, 1, 0x10, (1024), reg_init, reg_read, reg_write)
+       DEFINE_REG(13, 0, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
+       DEFINE_REG(13, 1, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x8,  0, sata_revid_init, sata_revid_read, 0)
+       DEFINE_REG(14, 0, 0x10, 0, reg_init, reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x14, 0, reg_init, reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x18, 0, reg_init, reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x1C, 0, reg_init, reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x20, 0, reg_init, reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x24, (0x200), sata_reg_init, reg_read, reg_write)
+       DEFINE_REG(15, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(15, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(16, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(16, 0, 0x14, (64*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(16, 0, 0x18, (64*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(17, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(18, 0, 0x10, (1*KB), reg_init, reg_read, reg_write)
+};
+
+static void __init init_sim_regs(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
+               if (bus1_fixups[i].init)
+                       bus1_fixups[i].init(&bus1_fixups[i]);
+       }
+}
+
+static inline void extract_bytes(u32 *value, int reg, int len)
+{
+       uint32_t mask;
+
+       *value >>= ((reg & 3) * 8);
+       mask = 0xFFFFFFFF >> ((4 - len) * 8);
+       *value &= mask;
+}
+
+int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
+{
+       u32 av_bridge_base, av_bridge_limit;
+       int retval = 0;
+
+       switch (reg) {
+       /* Make BARs appear to not request any memory. */
+       case PCI_BASE_ADDRESS_0:
+       case PCI_BASE_ADDRESS_0 + 1:
+       case PCI_BASE_ADDRESS_0 + 2:
+       case PCI_BASE_ADDRESS_0 + 3:
+               *value = 0;
+               break;
+
+               /* Since subordinate bus number register is hardwired
+                * to zero and read only, so do the simulation.
+                */
+       case PCI_PRIMARY_BUS:
+               if (len == 4)
+                       *value = 0x00010100;
+               break;
+
+       case PCI_SUBORDINATE_BUS:
+               *value = 1;
+               break;
+
+       case PCI_MEMORY_BASE:
+       case PCI_MEMORY_LIMIT:
+               /* Get the A/V bridge base address. */
+               pci_direct_conf1.read(0, 0, devfn,
+                               PCI_BASE_ADDRESS_0, 4, &av_bridge_base);
+
+               av_bridge_limit = av_bridge_base + (512*MB - 1);
+               av_bridge_limit >>= 16;
+               av_bridge_limit &= 0xFFF0;
+
+               av_bridge_base >>= 16;
+               av_bridge_base &= 0xFFF0;
+
+               if (reg == PCI_MEMORY_LIMIT)
+                       *value = av_bridge_limit;
+               else if (len == 2)
+                       *value = av_bridge_base;
+               else
+                       *value = (av_bridge_limit << 16) | av_bridge_base;
+               break;
+               /* Make prefetchable memory limit smaller than prefetchable
+                * memory base, so not claim prefetchable memory space.
+                */
+       case PCI_PREF_MEMORY_BASE:
+               *value = 0xFFF0;
+               break;
+       case PCI_PREF_MEMORY_LIMIT:
+               *value = 0x0;
+               break;
+               /* Make IO limit smaller than IO base, so not claim IO space. */
+       case PCI_IO_BASE:
+               *value = 0xF0;
+               break;
+       case PCI_IO_LIMIT:
+               *value = 0;
+               break;
+       default:
+               retval = 1;
+       }
+       return retval;
+}
+
+static int ce4100_conf_read(unsigned int seg, unsigned int bus,
+                           unsigned int devfn, int reg, int len, u32 *value)
+{
+       int i, retval = 1;
+
+       if (bus == 1) {
+               for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
+                       if (bus1_fixups[i].dev_func == devfn &&
+                           bus1_fixups[i].reg == (reg & ~3) &&
+                           bus1_fixups[i].read) {
+                               bus1_fixups[i].read(&(bus1_fixups[i]),
+                                                   value);
+                               extract_bytes(value, reg, len);
+                               return 0;
+                       }
+               }
+       }
+
+       if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) &&
+           !bridge_read(devfn, reg, len, value))
+               return 0;
+
+       return pci_direct_conf1.read(seg, bus, devfn, reg, len, value);
+}
+
+static int ce4100_conf_write(unsigned int seg, unsigned int bus,
+                            unsigned int devfn, int reg, int len, u32 value)
+{
+       int i;
+
+       if (bus == 1) {
+               for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
+                       if (bus1_fixups[i].dev_func == devfn &&
+                           bus1_fixups[i].reg == (reg & ~3) &&
+                           bus1_fixups[i].write) {
+                               bus1_fixups[i].write(&(bus1_fixups[i]),
+                                                    value);
+                               return 0;
+                       }
+               }
+       }
+
+       /* Discard writes to A/V bridge BAR. */
+       if (bus == 0 && PCI_DEVFN(1, 0) == devfn &&
+           ((reg & ~3) == PCI_BASE_ADDRESS_0))
+               return 0;
+
+       return pci_direct_conf1.write(seg, bus, devfn, reg, len, value);
+}
+
+struct pci_raw_ops ce4100_pci_conf = {
+       .read = ce4100_conf_read,
+       .write = ce4100_conf_write,
+};
+
+static int __init ce4100_pci_init(void)
+{
+       init_sim_regs();
+       raw_pci_ops = &ce4100_pci_conf;
+       return 0;
+}
+subsys_initcall(ce4100_pci_init);
index 2492d165096a2a696cf8332534966cfce2c848a7..a5f7d0d63de0def1481382f785d0fc34d553f0f6 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/uaccess.h>
 #include <asm/pci_x86.h>
 #include <asm/pci-functions.h>
+#include <asm/cacheflush.h>
 
 /* BIOS32 signature: "_32_" */
 #define BIOS32_SIGNATURE       (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
 #define PCIBIOS_HW_TYPE1_SPEC          0x10
 #define PCIBIOS_HW_TYPE2_SPEC          0x20
 
+int pcibios_enabled;
+
+/* According to the BIOS specification at:
+ * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could
+ * restrict the x zone to some pages and make it ro. But this may be
+ * broken on some bios, complex to handle with static_protections.
+ * We could make the 0xe0000-0x100000 range rox, but this can break
+ * some ISA mapping.
+ *
+ * So we let's an rw and x hole when pcibios is used. This shouldn't
+ * happen for modern system with mmconfig, and if you don't want it
+ * you could disable pcibios...
+ */
+static inline void set_bios_x(void)
+{
+       pcibios_enabled = 1;
+       set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT);
+       if (__supported_pte_mask & _PAGE_NX)
+               printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n");
+}
+
 /*
  * This is the standard structure used to identify the entry point
  * to the BIOS32 Service Directory, as documented in
@@ -332,6 +354,7 @@ static struct pci_raw_ops * __devinit pci_find_bios(void)
                        DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n",
                                        bios32_entry);
                        bios32_indirect.address = bios32_entry + PAGE_OFFSET;
+                       set_bios_x();
                        if (check_pcibios())
                                return &pci_bios_access;
                }
index 7bf70b812fa2a436ec8ce1bea270b05af2b2f487..021eee91c0562503dbb68c5bea490674d9db8618 100644 (file)
@@ -1,5 +1,7 @@
 # Platform specific code goes here
+obj-y  += ce4100/
 obj-y  += efi/
+obj-y  += iris/
 obj-y  += mrst/
 obj-y  += olpc/
 obj-y  += scx200/
diff --git a/arch/x86/platform/ce4100/Makefile b/arch/x86/platform/ce4100/Makefile
new file mode 100644 (file)
index 0000000..91fc929
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_X86_INTEL_CE)     += ce4100.o
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
new file mode 100644 (file)
index 0000000..d2c0d51
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * Intel CE4100  platform specific setup code
+ *
+ * (C) Copyright 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+
+#include <asm/setup.h>
+#include <asm/io.h>
+
+static int ce4100_i8042_detect(void)
+{
+       return 0;
+}
+
+static void __init sdv_find_smp_config(void)
+{
+}
+
+#ifdef CONFIG_SERIAL_8250
+
+
+static unsigned int mem_serial_in(struct uart_port *p, int offset)
+{
+       offset = offset << p->regshift;
+       return readl(p->membase + offset);
+}
+
+/*
+ * The UART Tx interrupts are not set under some conditions and therefore serial
+ * transmission hangs. This is a silicon issue and has not been root caused. The
+ * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT
+ * bit of LSR register in interrupt handler to see whether at least one of these
+ * two bits is set, if so then process the transmit request. If this workaround
+ * is not applied, then the serial transmission may hang. This workaround is for
+ * errata number 9 in Errata - B step.
+*/
+
+static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset)
+{
+       unsigned int ret, ier, lsr;
+
+       if (offset == UART_IIR) {
+               offset = offset << p->regshift;
+               ret = readl(p->membase + offset);
+               if (ret & UART_IIR_NO_INT) {
+                       /* see if the TX interrupt should have really set */
+                       ier = mem_serial_in(p, UART_IER);
+                       /* see if the UART's XMIT interrupt is enabled */
+                       if (ier & UART_IER_THRI) {
+                               lsr = mem_serial_in(p, UART_LSR);
+                               /* now check to see if the UART should be
+                                  generating an interrupt (but isn't) */
+                               if (lsr & (UART_LSR_THRE | UART_LSR_TEMT))
+                                       ret &= ~UART_IIR_NO_INT;
+                       }
+               }
+       } else
+               ret =  mem_serial_in(p, offset);
+       return ret;
+}
+
+static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value)
+{
+       offset = offset << p->regshift;
+       writel(value, p->membase + offset);
+}
+
+static void ce4100_serial_fixup(int port, struct uart_port *up,
+       unsigned short *capabilites)
+{
+#ifdef CONFIG_EARLY_PRINTK
+       /*
+        * Over ride the legacy port configuration that comes from
+        * asm/serial.h. Using the ioport driver then switching to the
+        * PCI memmaped driver hangs the IOAPIC
+        */
+       if (up->iotype !=  UPIO_MEM32) {
+               up->uartclk  = 14745600;
+               up->mapbase = 0xdffe0200;
+               set_fixmap_nocache(FIX_EARLYCON_MEM_BASE,
+                               up->mapbase & PAGE_MASK);
+               up->membase =
+                       (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
+               up->membase += up->mapbase & ~PAGE_MASK;
+               up->iotype   = UPIO_MEM32;
+               up->regshift = 2;
+       }
+#endif
+       up->iobase = 0;
+       up->serial_in = ce4100_mem_serial_in;
+       up->serial_out = ce4100_mem_serial_out;
+
+       *capabilites |= (1 << 12);
+}
+
+static __init void sdv_serial_fixup(void)
+{
+       serial8250_set_isa_configurator(ce4100_serial_fixup);
+}
+
+#else
+static inline void sdv_serial_fixup(void);
+#endif
+
+static void __init sdv_arch_setup(void)
+{
+       sdv_serial_fixup();
+}
+
+/*
+ * CE4100 specific x86_init function overrides and early setup
+ * calls.
+ */
+void __init x86_ce4100_early_setup(void)
+{
+       x86_init.oem.arch_setup = sdv_arch_setup;
+       x86_platform.i8042_detect = ce4100_i8042_detect;
+       x86_init.resources.probe_roms = x86_init_noop;
+       x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+       x86_init.mpparse.find_smp_config = sdv_find_smp_config;
+}
diff --git a/arch/x86/platform/iris/Makefile b/arch/x86/platform/iris/Makefile
new file mode 100644 (file)
index 0000000..db92198
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_X86_32_IRIS)              += iris.o
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
new file mode 100644 (file)
index 0000000..1ba7f5e
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Eurobraille/Iris power off support.
+ *
+ * Eurobraille's Iris machine is a PC with no APM or ACPI support.
+ * It is shutdown by a special I/O sequence which this module provides.
+ *
+ *  Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <asm/io.h>
+
+#define IRIS_GIO_BASE          0x340
+#define IRIS_GIO_INPUT         IRIS_GIO_BASE
+#define IRIS_GIO_OUTPUT                (IRIS_GIO_BASE + 1)
+#define IRIS_GIO_PULSE         0x80 /* First byte to send */
+#define IRIS_GIO_REST          0x00 /* Second byte to send */
+#define IRIS_GIO_NODEV         0xff /* Likely not an Iris */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
+MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
+MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
+
+static int force;
+
+module_param(force, bool, 0);
+MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation.");
+
+static void (*old_pm_power_off)(void);
+
+static void iris_power_off(void)
+{
+       outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT);
+       msleep(850);
+       outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT);
+}
+
+/*
+ * Before installing the power_off handler, try to make sure the OS is
+ * running on an Iris.  Since Iris does not support DMI, this is done
+ * by reading its input port and seeing whether the read value is
+ * meaningful.
+ */
+static int iris_init(void)
+{
+       unsigned char status;
+       if (force != 1) {
+               printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n");
+               return -ENODEV;
+       }
+       status = inb(IRIS_GIO_INPUT);
+       if (status == IRIS_GIO_NODEV) {
+               printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n");
+               return -ENODEV;
+       }
+       old_pm_power_off = pm_power_off;
+       pm_power_off = &iris_power_off;
+       printk(KERN_INFO "Iris power_off handler installed.\n");
+
+       return 0;
+}
+
+static void iris_exit(void)
+{
+       pm_power_off = old_pm_power_off;
+       printk(KERN_INFO "Iris power_off handler uninstalled.\n");
+}
+
+module_init(iris_init);
+module_exit(iris_exit);
index efbbc552fa953a5bc5e70e8108ec909cb3061252..f61ccdd4934141444f1c8f27d2b3a783f06c37d0 100644 (file)
@@ -1 +1,3 @@
 obj-$(CONFIG_X86_MRST)         += mrst.o
+obj-$(CONFIG_X86_MRST)         += vrtc.o
+obj-$(CONFIG_EARLY_PRINTK_MRST)        += early_printk_mrst.o
diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c
new file mode 100644 (file)
index 0000000..65df603
--- /dev/null
@@ -0,0 +1,319 @@
+/*
+ * early_printk_mrst.c - early consoles for Intel MID platforms
+ *
+ * Copyright (c) 2008-2010, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+/*
+ * This file implements two early consoles named mrst and hsu.
+ * mrst is based on Maxim3110 spi-uart device, it exists in both
+ * Moorestown and Medfield platforms, while hsu is based on a High
+ * Speed UART device which only exists in the Medfield platform
+ */
+
+#include <linux/serial_reg.h>
+#include <linux/serial_mfd.h>
+#include <linux/kmsg_dump.h>
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <asm/mrst.h>
+
+#define MRST_SPI_TIMEOUT               0x200000
+#define MRST_REGBASE_SPI0              0xff128000
+#define MRST_REGBASE_SPI1              0xff128400
+#define MRST_CLK_SPI0_REG              0xff11d86c
+
+/* Bit fields in CTRLR0 */
+#define SPI_DFS_OFFSET                 0
+
+#define SPI_FRF_OFFSET                 4
+#define SPI_FRF_SPI                    0x0
+#define SPI_FRF_SSP                    0x1
+#define SPI_FRF_MICROWIRE              0x2
+#define SPI_FRF_RESV                   0x3
+
+#define SPI_MODE_OFFSET                        6
+#define SPI_SCPH_OFFSET                        6
+#define SPI_SCOL_OFFSET                        7
+#define SPI_TMOD_OFFSET                        8
+#define        SPI_TMOD_TR                     0x0             /* xmit & recv */
+#define SPI_TMOD_TO                    0x1             /* xmit only */
+#define SPI_TMOD_RO                    0x2             /* recv only */
+#define SPI_TMOD_EPROMREAD             0x3             /* eeprom read mode */
+
+#define SPI_SLVOE_OFFSET               10
+#define SPI_SRL_OFFSET                 11
+#define SPI_CFS_OFFSET                 12
+
+/* Bit fields in SR, 7 bits */
+#define SR_MASK                                0x7f            /* cover 7 bits */
+#define SR_BUSY                                (1 << 0)
+#define SR_TF_NOT_FULL                 (1 << 1)
+#define SR_TF_EMPT                     (1 << 2)
+#define SR_RF_NOT_EMPT                 (1 << 3)
+#define SR_RF_FULL                     (1 << 4)
+#define SR_TX_ERR                      (1 << 5)
+#define SR_DCOL                                (1 << 6)
+
+struct dw_spi_reg {
+       u32     ctrl0;
+       u32     ctrl1;
+       u32     ssienr;
+       u32     mwcr;
+       u32     ser;
+       u32     baudr;
+       u32     txfltr;
+       u32     rxfltr;
+       u32     txflr;
+       u32     rxflr;
+       u32     sr;
+       u32     imr;
+       u32     isr;
+       u32     risr;
+       u32     txoicr;
+       u32     rxoicr;
+       u32     rxuicr;
+       u32     msticr;
+       u32     icr;
+       u32     dmacr;
+       u32     dmatdlr;
+       u32     dmardlr;
+       u32     idr;
+       u32     version;
+
+       /* Currently operates as 32 bits, though only the low 16 bits matter */
+       u32     dr;
+} __packed;
+
+#define dw_readl(dw, name)             __raw_readl(&(dw)->name)
+#define dw_writel(dw, name, val)       __raw_writel((val), &(dw)->name)
+
+/* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */
+static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0;
+
+static u32 *pclk_spi0;
+/* Always contains an accessable address, start with 0 */
+static struct dw_spi_reg *pspi;
+
+static struct kmsg_dumper dw_dumper;
+static int dumper_registered;
+
+static void dw_kmsg_dump(struct kmsg_dumper *dumper,
+                       enum kmsg_dump_reason reason,
+                       const char *s1, unsigned long l1,
+                       const char *s2, unsigned long l2)
+{
+       int i;
+
+       /* When run to this, we'd better re-init the HW */
+       mrst_early_console_init();
+
+       for (i = 0; i < l1; i++)
+               early_mrst_console.write(&early_mrst_console, s1 + i, 1);
+       for (i = 0; i < l2; i++)
+               early_mrst_console.write(&early_mrst_console, s2 + i, 1);
+}
+
+/* Set the ratio rate to 115200, 8n1, IRQ disabled */
+static void max3110_write_config(void)
+{
+       u16 config;
+
+       config = 0xc001;
+       dw_writel(pspi, dr, config);
+}
+
+/* Translate char to a eligible word and send to max3110 */
+static void max3110_write_data(char c)
+{
+       u16 data;
+
+       data = 0x8000 | c;
+       dw_writel(pspi, dr, data);
+}
+
+void mrst_early_console_init(void)
+{
+       u32 ctrlr0 = 0;
+       u32 spi0_cdiv;
+       u32 freq; /* Freqency info only need be searched once */
+
+       /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */
+       pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
+                                                       MRST_CLK_SPI0_REG);
+       spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9;
+       freq = 100000000 / (spi0_cdiv + 1);
+
+       if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL)
+               mrst_spi_paddr = MRST_REGBASE_SPI1;
+
+       pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
+                                               mrst_spi_paddr);
+
+       /* Disable SPI controller */
+       dw_writel(pspi, ssienr, 0);
+
+       /* Set control param, 8 bits, transmit only mode */
+       ctrlr0 = dw_readl(pspi, ctrl0);
+
+       ctrlr0 &= 0xfcc0;
+       ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET)
+                     | (SPI_TMOD_TO << SPI_TMOD_OFFSET);
+       dw_writel(pspi, ctrl0, ctrlr0);
+
+       /*
+        * Change the spi0 clk to comply with 115200 bps, use 100000 to
+        * calculate the clk dividor to make the clock a little slower
+        * than real baud rate.
+        */
+       dw_writel(pspi, baudr, freq/100000);
+
+       /* Disable all INT for early phase */
+       dw_writel(pspi, imr, 0x0);
+
+       /* Set the cs to spi-uart */
+       dw_writel(pspi, ser, 0x2);
+
+       /* Enable the HW, the last step for HW init */
+       dw_writel(pspi, ssienr, 0x1);
+
+       /* Set the default configuration */
+       max3110_write_config();
+
+       /* Register the kmsg dumper */
+       if (!dumper_registered) {
+               dw_dumper.dump = dw_kmsg_dump;
+               kmsg_dump_register(&dw_dumper);
+               dumper_registered = 1;
+       }
+}
+
+/* Slave select should be called in the read/write function */
+static void early_mrst_spi_putc(char c)
+{
+       unsigned int timeout;
+       u32 sr;
+
+       timeout = MRST_SPI_TIMEOUT;
+       /* Early putc needs to make sure the TX FIFO is not full */
+       while (--timeout) {
+               sr = dw_readl(pspi, sr);
+               if (!(sr & SR_TF_NOT_FULL))
+                       cpu_relax();
+               else
+                       break;
+       }
+
+       if (!timeout)
+               pr_warning("MRST earlycon: timed out\n");
+       else
+               max3110_write_data(c);
+}
+
+/* Early SPI only uses polling mode */
+static void early_mrst_spi_write(struct console *con, const char *str, unsigned n)
+{
+       int i;
+
+       for (i = 0; i < n && *str; i++) {
+               if (*str == '\n')
+                       early_mrst_spi_putc('\r');
+               early_mrst_spi_putc(*str);
+               str++;
+       }
+}
+
+struct console early_mrst_console = {
+       .name =         "earlymrst",
+       .write =        early_mrst_spi_write,
+       .flags =        CON_PRINTBUFFER,
+       .index =        -1,
+};
+
+/*
+ * Following is the early console based on Medfield HSU (High
+ * Speed UART) device.
+ */
+#define HSU_PORT2_PADDR                0xffa28180
+
+static void __iomem *phsu;
+
+void hsu_early_console_init(void)
+{
+       u8 lcr;
+
+       phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
+                                                       HSU_PORT2_PADDR);
+
+       /* Disable FIFO */
+       writeb(0x0, phsu + UART_FCR);
+
+       /* Set to default 115200 bps, 8n1 */
+       lcr = readb(phsu + UART_LCR);
+       writeb((0x80 | lcr), phsu + UART_LCR);
+       writeb(0x18, phsu + UART_DLL);
+       writeb(lcr,  phsu + UART_LCR);
+       writel(0x3600, phsu + UART_MUL*4);
+
+       writeb(0x8, phsu + UART_MCR);
+       writeb(0x7, phsu + UART_FCR);
+       writeb(0x3, phsu + UART_LCR);
+
+       /* Clear IRQ status */
+       readb(phsu + UART_LSR);
+       readb(phsu + UART_RX);
+       readb(phsu + UART_IIR);
+       readb(phsu + UART_MSR);
+
+       /* Enable FIFO */
+       writeb(0x7, phsu + UART_FCR);
+}
+
+#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
+
+static void early_hsu_putc(char ch)
+{
+       unsigned int timeout = 10000; /* 10ms */
+       u8 status;
+
+       while (--timeout) {
+               status = readb(phsu + UART_LSR);
+               if (status & BOTH_EMPTY)
+                       break;
+               udelay(1);
+       }
+
+       /* Only write the char when there was no timeout */
+       if (timeout)
+               writeb(ch, phsu + UART_TX);
+}
+
+static void early_hsu_write(struct console *con, const char *str, unsigned n)
+{
+       int i;
+
+       for (i = 0; i < n && *str; i++) {
+               if (*str == '\n')
+                       early_hsu_putc('\r');
+               early_hsu_putc(*str);
+               str++;
+       }
+}
+
+struct console early_hsu_console = {
+       .name =         "earlyhsu",
+       .write =        early_hsu_write,
+       .flags =        CON_PRINTBUFFER,
+       .index =        -1,
+};
index 79ae68154e871fe208ff5fcfd809daf3b03c4ab5..fee0b4914e07ad494f82629ccc10dc0a1980879c 100644 (file)
@@ -9,9 +9,19 @@
  * as published by the Free Software Foundation; version 2
  * of the License.
  */
+
+#define pr_fmt(fmt) "mrst: " fmt
+
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sfi.h>
+#include <linux/intel_pmic_gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/i2c.h>
+#include <linux/i2c/pca953x.h>
+#include <linux/gpio_keys.h>
+#include <linux/input.h>
+#include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/module.h>
 
@@ -23,7 +33,9 @@
 #include <asm/mrst.h>
 #include <asm/io.h>
 #include <asm/i8259.h>
+#include <asm/intel_scu_ipc.h>
 #include <asm/apb_timer.h>
+#include <asm/reboot.h>
 
 /*
  * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
@@ -102,10 +114,10 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table)
                memcpy(sfi_mtimer_array, pentry, totallen);
        }
 
-       printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num);
+       pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
        pentry = sfi_mtimer_array;
        for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
-               printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz,"
+               pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz,"
                        " irq = %d\n", totallen, (u32)pentry->phys_addr,
                        pentry->freq_hz, pentry->irq);
                        if (!pentry->irq)
@@ -176,14 +188,14 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
                memcpy(sfi_mrtc_array, pentry, totallen);
        }
 
-       printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num);
+       pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
        pentry = sfi_mrtc_array;
        for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
-               printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n",
+               pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
                        totallen, (u32)pentry->phys_addr, pentry->irq);
                mp_irq.type = MP_IOAPIC;
                mp_irq.irqtype = mp_INT;
-               mp_irq.irqflag = 0;
+               mp_irq.irqflag = 0xf;   /* level trigger and active low */
                mp_irq.srcbus = 0;
                mp_irq.srcbusirq = pentry->irq; /* IRQ */
                mp_irq.dstapic = MP_APIC_ALL;
@@ -209,6 +221,7 @@ static unsigned long __init mrst_calibrate_tsc(void)
 
 void __init mrst_time_init(void)
 {
+       sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
        switch (mrst_timer_options) {
        case MRST_TIMER_APBT_ONLY:
                break;
@@ -224,16 +237,10 @@ void __init mrst_time_init(void)
                return;
        }
        /* we need at least one APB timer */
-       sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
        pre_init_apic_IRQ0();
        apbt_time_init();
 }
 
-void __init mrst_rtc_init(void)
-{
-       sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
-}
-
 void __cpuinit mrst_arch_setup(void)
 {
        if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
@@ -256,6 +263,17 @@ static int mrst_i8042_detect(void)
        return 0;
 }
 
+/* Reboot and power off are handled by the SCU on a MID device */
+static void mrst_power_off(void)
+{
+       intel_scu_ipc_simple_command(0xf1, 1);
+}
+
+static void mrst_reboot(void)
+{
+       intel_scu_ipc_simple_command(0xf1, 0);
+}
+
 /*
  * Moorestown specific x86_init function overrides and early setup
  * calls.
@@ -281,6 +299,10 @@ void __init x86_mrst_early_setup(void)
 
        legacy_pic = &null_legacy_pic;
 
+       /* Moorestown specific power_off/restart method */
+       pm_power_off = mrst_power_off;
+       machine_ops.emergency_restart  = mrst_reboot;
+
        /* Avoid searching for BIOS MP tables */
        x86_init.mpparse.find_smp_config = x86_init_noop;
        x86_init.mpparse.get_smp_config = x86_init_uint_noop;
@@ -309,3 +331,505 @@ static inline int __init setup_x86_mrst_timer(char *arg)
        return 0;
 }
 __setup("x86_mrst_timer=", setup_x86_mrst_timer);
+
+/*
+ * Parsing GPIO table first, since the DEVS table will need this table
+ * to map the pin name to the actual pin.
+ */
+static struct sfi_gpio_table_entry *gpio_table;
+static int gpio_num_entry;
+
+static int __init sfi_parse_gpio(struct sfi_table_header *table)
+{
+       struct sfi_table_simple *sb;
+       struct sfi_gpio_table_entry *pentry;
+       int num, i;
+
+       if (gpio_table)
+               return 0;
+       sb = (struct sfi_table_simple *)table;
+       num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
+       pentry = (struct sfi_gpio_table_entry *)sb->pentry;
+
+       gpio_table = (struct sfi_gpio_table_entry *)
+                               kmalloc(num * sizeof(*pentry), GFP_KERNEL);
+       if (!gpio_table)
+               return -1;
+       memcpy(gpio_table, pentry, num * sizeof(*pentry));
+       gpio_num_entry = num;
+
+       pr_debug("GPIO pin info:\n");
+       for (i = 0; i < num; i++, pentry++)
+               pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
+               " pin = %d\n", i,
+                       pentry->controller_name,
+                       pentry->pin_name,
+                       pentry->pin_no);
+       return 0;
+}
+
+static int get_gpio_by_name(const char *name)
+{
+       struct sfi_gpio_table_entry *pentry = gpio_table;
+       int i;
+
+       if (!pentry)
+               return -1;
+       for (i = 0; i < gpio_num_entry; i++, pentry++) {
+               if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
+                       return pentry->pin_no;
+       }
+       return -1;
+}
+
+/*
+ * Here defines the array of devices platform data that IAFW would export
+ * through SFI "DEVS" table, we use name and type to match the device and
+ * its platform data.
+ */
+struct devs_id {
+       char name[SFI_NAME_LEN + 1];
+       u8 type;
+       u8 delay;
+       void *(*get_platform_data)(void *info);
+};
+
+/* the offset for the mapping of global gpio pin to irq */
+#define MRST_IRQ_OFFSET 0x100
+
+static void __init *pmic_gpio_platform_data(void *info)
+{
+       static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
+       int gpio_base = get_gpio_by_name("pmic_gpio_base");
+
+       if (gpio_base == -1)
+               gpio_base = 64;
+       pmic_gpio_pdata.gpio_base = gpio_base;
+       pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET;
+       pmic_gpio_pdata.gpiointr = 0xffffeff8;
+
+       return &pmic_gpio_pdata;
+}
+
+static void __init *max3111_platform_data(void *info)
+{
+       struct spi_board_info *spi_info = info;
+       int intr = get_gpio_by_name("max3111_int");
+
+       if (intr == -1)
+               return NULL;
+       spi_info->irq = intr + MRST_IRQ_OFFSET;
+       return NULL;
+}
+
+/* we have multiple max7315 on the board ... */
+#define MAX7315_NUM 2
+static void __init *max7315_platform_data(void *info)
+{
+       static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
+       static int nr;
+       struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
+       struct i2c_board_info *i2c_info = info;
+       int gpio_base, intr;
+       char base_pin_name[SFI_NAME_LEN + 1];
+       char intr_pin_name[SFI_NAME_LEN + 1];
+
+       if (nr == MAX7315_NUM) {
+               pr_err("too many max7315s, we only support %d\n",
+                               MAX7315_NUM);
+               return NULL;
+       }
+       /* we have several max7315 on the board, we only need load several
+        * instances of the same pca953x driver to cover them
+        */
+       strcpy(i2c_info->type, "max7315");
+       if (nr++) {
+               sprintf(base_pin_name, "max7315_%d_base", nr);
+               sprintf(intr_pin_name, "max7315_%d_int", nr);
+       } else {
+               strcpy(base_pin_name, "max7315_base");
+               strcpy(intr_pin_name, "max7315_int");
+       }
+
+       gpio_base = get_gpio_by_name(base_pin_name);
+       intr = get_gpio_by_name(intr_pin_name);
+
+       if (gpio_base == -1)
+               return NULL;
+       max7315->gpio_base = gpio_base;
+       if (intr != -1) {
+               i2c_info->irq = intr + MRST_IRQ_OFFSET;
+               max7315->irq_base = gpio_base + MRST_IRQ_OFFSET;
+       } else {
+               i2c_info->irq = -1;
+               max7315->irq_base = -1;
+       }
+       return max7315;
+}
+
+static void __init *emc1403_platform_data(void *info)
+{
+       static short intr2nd_pdata;
+       struct i2c_board_info *i2c_info = info;
+       int intr = get_gpio_by_name("thermal_int");
+       int intr2nd = get_gpio_by_name("thermal_alert");
+
+       if (intr == -1 || intr2nd == -1)
+               return NULL;
+
+       i2c_info->irq = intr + MRST_IRQ_OFFSET;
+       intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
+
+       return &intr2nd_pdata;
+}
+
+static void __init *lis331dl_platform_data(void *info)
+{
+       static short intr2nd_pdata;
+       struct i2c_board_info *i2c_info = info;
+       int intr = get_gpio_by_name("accel_int");
+       int intr2nd = get_gpio_by_name("accel_2");
+
+       if (intr == -1 || intr2nd == -1)
+               return NULL;
+
+       i2c_info->irq = intr + MRST_IRQ_OFFSET;
+       intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
+
+       return &intr2nd_pdata;
+}
+
+static void __init *no_platform_data(void *info)
+{
+       return NULL;
+}
+
+static const struct devs_id __initconst device_ids[] = {
+       {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
+       {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
+       {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
+       {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
+       {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
+       {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
+       {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
+       {"msic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
+       {},
+};
+
+#define MAX_IPCDEVS    24
+static struct platform_device *ipc_devs[MAX_IPCDEVS];
+static int ipc_next_dev;
+
+#define MAX_SCU_SPI    24
+static struct spi_board_info *spi_devs[MAX_SCU_SPI];
+static int spi_next_dev;
+
+#define MAX_SCU_I2C    24
+static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
+static int i2c_bus[MAX_SCU_I2C];
+static int i2c_next_dev;
+
+static void __init intel_scu_device_register(struct platform_device *pdev)
+{
+       if(ipc_next_dev == MAX_IPCDEVS)
+               pr_err("too many SCU IPC devices");
+       else
+               ipc_devs[ipc_next_dev++] = pdev;
+}
+
+static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
+{
+       struct spi_board_info *new_dev;
+
+       if (spi_next_dev == MAX_SCU_SPI) {
+               pr_err("too many SCU SPI devices");
+               return;
+       }
+
+       new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+       if (!new_dev) {
+               pr_err("failed to alloc mem for delayed spi dev %s\n",
+                       sdev->modalias);
+               return;
+       }
+       memcpy(new_dev, sdev, sizeof(*sdev));
+
+       spi_devs[spi_next_dev++] = new_dev;
+}
+
+static void __init intel_scu_i2c_device_register(int bus,
+                                               struct i2c_board_info *idev)
+{
+       struct i2c_board_info *new_dev;
+
+       if (i2c_next_dev == MAX_SCU_I2C) {
+               pr_err("too many SCU I2C devices");
+               return;
+       }
+
+       new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
+       if (!new_dev) {
+               pr_err("failed to alloc mem for delayed i2c dev %s\n",
+                       idev->type);
+               return;
+       }
+       memcpy(new_dev, idev, sizeof(*idev));
+
+       i2c_bus[i2c_next_dev] = bus;
+       i2c_devs[i2c_next_dev++] = new_dev;
+}
+
+/* Called by IPC driver */
+void intel_scu_devices_create(void)
+{
+       int i;
+
+       for (i = 0; i < ipc_next_dev; i++)
+               platform_device_add(ipc_devs[i]);
+
+       for (i = 0; i < spi_next_dev; i++)
+               spi_register_board_info(spi_devs[i], 1);
+
+       for (i = 0; i < i2c_next_dev; i++) {
+               struct i2c_adapter *adapter;
+               struct i2c_client *client;
+
+               adapter = i2c_get_adapter(i2c_bus[i]);
+               if (adapter) {
+                       client = i2c_new_device(adapter, i2c_devs[i]);
+                       if (!client)
+                               pr_err("can't create i2c device %s\n",
+                                       i2c_devs[i]->type);
+               } else
+                       i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
+       }
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_create);
+
+/* Called by IPC driver */
+void intel_scu_devices_destroy(void)
+{
+       int i;
+
+       for (i = 0; i < ipc_next_dev; i++)
+               platform_device_del(ipc_devs[i]);
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
+
+static void __init install_irq_resource(struct platform_device *pdev, int irq)
+{
+       /* Single threaded */
+       static struct resource __initdata res = {
+               .name = "IRQ",
+               .flags = IORESOURCE_IRQ,
+       };
+       res.start = irq;
+       platform_device_add_resources(pdev, &res, 1);
+}
+
+static void __init sfi_handle_ipc_dev(struct platform_device *pdev)
+{
+       const struct devs_id *dev = device_ids;
+       void *pdata = NULL;
+
+       while (dev->name[0]) {
+               if (dev->type == SFI_DEV_TYPE_IPC &&
+                       !strncmp(dev->name, pdev->name, SFI_NAME_LEN)) {
+                       pdata = dev->get_platform_data(pdev);
+                       break;
+               }
+               dev++;
+       }
+       pdev->dev.platform_data = pdata;
+       intel_scu_device_register(pdev);
+}
+
+static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info)
+{
+       const struct devs_id *dev = device_ids;
+       void *pdata = NULL;
+
+       while (dev->name[0]) {
+               if (dev->type == SFI_DEV_TYPE_SPI &&
+                               !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) {
+                       pdata = dev->get_platform_data(spi_info);
+                       break;
+               }
+               dev++;
+       }
+       spi_info->platform_data = pdata;
+       if (dev->delay)
+               intel_scu_spi_device_register(spi_info);
+       else
+               spi_register_board_info(spi_info, 1);
+}
+
+static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info)
+{
+       const struct devs_id *dev = device_ids;
+       void *pdata = NULL;
+
+       while (dev->name[0]) {
+               if (dev->type == SFI_DEV_TYPE_I2C &&
+                       !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) {
+                       pdata = dev->get_platform_data(i2c_info);
+                       break;
+               }
+               dev++;
+       }
+       i2c_info->platform_data = pdata;
+
+       if (dev->delay)
+               intel_scu_i2c_device_register(bus, i2c_info);
+       else
+               i2c_register_board_info(bus, i2c_info, 1);
+ }
+
+
+static int __init sfi_parse_devs(struct sfi_table_header *table)
+{
+       struct sfi_table_simple *sb;
+       struct sfi_device_table_entry *pentry;
+       struct spi_board_info spi_info;
+       struct i2c_board_info i2c_info;
+       struct platform_device *pdev;
+       int num, i, bus;
+       int ioapic;
+       struct io_apic_irq_attr irq_attr;
+
+       sb = (struct sfi_table_simple *)table;
+       num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
+       pentry = (struct sfi_device_table_entry *)sb->pentry;
+
+       for (i = 0; i < num; i++, pentry++) {
+               if (pentry->irq != (u8)0xff) { /* native RTE case */
+                       /* these SPI2 devices are not exposed to system as PCI
+                        * devices, but they have separate RTE entry in IOAPIC
+                        * so we have to enable them one by one here
+                        */
+                       ioapic = mp_find_ioapic(pentry->irq);
+                       irq_attr.ioapic = ioapic;
+                       irq_attr.ioapic_pin = pentry->irq;
+                       irq_attr.trigger = 1;
+                       irq_attr.polarity = 1;
+                       io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
+               }
+               switch (pentry->type) {
+               case SFI_DEV_TYPE_IPC:
+                       /* ID as IRQ is a hack that will go away */
+                       pdev = platform_device_alloc(pentry->name, pentry->irq);
+                       if (pdev == NULL) {
+                               pr_err("out of memory for SFI platform device '%s'.\n",
+                                                       pentry->name);
+                               continue;
+                       }
+                       install_irq_resource(pdev, pentry->irq);
+                       pr_debug("info[%2d]: IPC bus, name = %16.16s, "
+                               "irq = 0x%2x\n", i, pentry->name, pentry->irq);
+                       sfi_handle_ipc_dev(pdev);
+                       break;
+               case SFI_DEV_TYPE_SPI:
+                       memset(&spi_info, 0, sizeof(spi_info));
+                       strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
+                       spi_info.irq = pentry->irq;
+                       spi_info.bus_num = pentry->host_num;
+                       spi_info.chip_select = pentry->addr;
+                       spi_info.max_speed_hz = pentry->max_freq;
+                       pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, "
+                               "irq = 0x%2x, max_freq = %d, cs = %d\n", i,
+                               spi_info.bus_num,
+                               spi_info.modalias,
+                               spi_info.irq,
+                               spi_info.max_speed_hz,
+                               spi_info.chip_select);
+                       sfi_handle_spi_dev(&spi_info);
+                       break;
+               case SFI_DEV_TYPE_I2C:
+                       memset(&i2c_info, 0, sizeof(i2c_info));
+                       bus = pentry->host_num;
+                       strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
+                       i2c_info.irq = pentry->irq;
+                       i2c_info.addr = pentry->addr;
+                       pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
+                               "irq = 0x%2x, addr = 0x%x\n", i, bus,
+                               i2c_info.type,
+                               i2c_info.irq,
+                               i2c_info.addr);
+                       sfi_handle_i2c_dev(bus, &i2c_info);
+                       break;
+               case SFI_DEV_TYPE_UART:
+               case SFI_DEV_TYPE_HSI:
+               default:
+                       ;
+               }
+       }
+       return 0;
+}
+
+static int __init mrst_platform_init(void)
+{
+       sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
+       sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
+       return 0;
+}
+arch_initcall(mrst_platform_init);
+
+/*
+ * we will search these buttons in SFI GPIO table (by name)
+ * and register them dynamically. Please add all possible
+ * buttons here, we will shrink them if no GPIO found.
+ */
+static struct gpio_keys_button gpio_button[] = {
+       {KEY_POWER,             -1, 1, "power_btn",     EV_KEY, 0, 3000},
+       {KEY_PROG1,             -1, 1, "prog_btn1",     EV_KEY, 0, 20},
+       {KEY_PROG2,             -1, 1, "prog_btn2",     EV_KEY, 0, 20},
+       {SW_LID,                -1, 1, "lid_switch",    EV_SW,  0, 20},
+       {KEY_VOLUMEUP,          -1, 1, "vol_up",        EV_KEY, 0, 20},
+       {KEY_VOLUMEDOWN,        -1, 1, "vol_down",      EV_KEY, 0, 20},
+       {KEY_CAMERA,            -1, 1, "camera_full",   EV_KEY, 0, 20},
+       {KEY_CAMERA_FOCUS,      -1, 1, "camera_half",   EV_KEY, 0, 20},
+       {SW_KEYPAD_SLIDE,       -1, 1, "MagSw1",        EV_SW,  0, 20},
+       {SW_KEYPAD_SLIDE,       -1, 1, "MagSw2",        EV_SW,  0, 20},
+};
+
+static struct gpio_keys_platform_data mrst_gpio_keys = {
+       .buttons        = gpio_button,
+       .rep            = 1,
+       .nbuttons       = -1, /* will fill it after search */
+};
+
+static struct platform_device pb_device = {
+       .name           = "gpio-keys",
+       .id             = -1,
+       .dev            = {
+               .platform_data  = &mrst_gpio_keys,
+       },
+};
+
+/*
+ * Shrink the non-existent buttons, register the gpio button
+ * device if there is some
+ */
+static int __init pb_keys_init(void)
+{
+       struct gpio_keys_button *gb = gpio_button;
+       int i, num, good = 0;
+
+       num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
+       for (i = 0; i < num; i++) {
+               gb[i].gpio = get_gpio_by_name(gb[i].desc);
+               if (gb[i].gpio == -1)
+                       continue;
+
+               if (i != good)
+                       gb[good] = gb[i];
+               good++;
+       }
+
+       if (good) {
+               mrst_gpio_keys.nbuttons = good;
+               return platform_device_register(&pb_device);
+       }
+       return 0;
+}
+late_initcall(pb_keys_init);
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
new file mode 100644 (file)
index 0000000..32cd7ed
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ * vrtc.c: Driver for virtual RTC device on Intel MID platform
+ *
+ * (C) Copyright 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Note:
+ * VRTC is emulated by system controller firmware, the real HW
+ * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
+ * in a memory mapped IO space that is visible to the host IA
+ * processor.
+ *
+ * This driver is based on RTC CMOS driver.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/platform_device.h>
+
+#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
+#include <asm/time.h>
+#include <asm/fixmap.h>
+
+static unsigned char __iomem *vrtc_virt_base;
+
+unsigned char vrtc_cmos_read(unsigned char reg)
+{
+       unsigned char retval;
+
+       /* vRTC's registers range from 0x0 to 0xD */
+       if (reg > 0xd || !vrtc_virt_base)
+               return 0xff;
+
+       lock_cmos_prefix(reg);
+       retval = __raw_readb(vrtc_virt_base + (reg << 2));
+       lock_cmos_suffix(reg);
+       return retval;
+}
+EXPORT_SYMBOL_GPL(vrtc_cmos_read);
+
+void vrtc_cmos_write(unsigned char val, unsigned char reg)
+{
+       if (reg > 0xd || !vrtc_virt_base)
+               return;
+
+       lock_cmos_prefix(reg);
+       __raw_writeb(val, vrtc_virt_base + (reg << 2));
+       lock_cmos_suffix(reg);
+}
+EXPORT_SYMBOL_GPL(vrtc_cmos_write);
+
+unsigned long vrtc_get_time(void)
+{
+       u8 sec, min, hour, mday, mon;
+       u32 year;
+
+       while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP))
+               cpu_relax();
+
+       sec = vrtc_cmos_read(RTC_SECONDS);
+       min = vrtc_cmos_read(RTC_MINUTES);
+       hour = vrtc_cmos_read(RTC_HOURS);
+       mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
+       mon = vrtc_cmos_read(RTC_MONTH);
+       year = vrtc_cmos_read(RTC_YEAR);
+
+       /* vRTC YEAR reg contains the offset to 1960 */
+       year += 1960;
+
+       printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d "
+               "mon: %d year: %d\n", sec, min, hour, mday, mon, year);
+
+       return mktime(year, mon, mday, hour, min, sec);
+}
+
+/* Only care about the minutes and seconds */
+int vrtc_set_mmss(unsigned long nowtime)
+{
+       int real_sec, real_min;
+       int vrtc_min;
+
+       vrtc_min = vrtc_cmos_read(RTC_MINUTES);
+
+       real_sec = nowtime % 60;
+       real_min = nowtime / 60;
+       if (((abs(real_min - vrtc_min) + 15)/30) & 1)
+               real_min += 30;
+       real_min %= 60;
+
+       vrtc_cmos_write(real_sec, RTC_SECONDS);
+       vrtc_cmos_write(real_min, RTC_MINUTES);
+       return 0;
+}
+
+void __init mrst_rtc_init(void)
+{
+       unsigned long rtc_paddr;
+       void __iomem *virt_base;
+
+       sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
+       if (!sfi_mrtc_num)
+               return;
+
+       rtc_paddr = sfi_mrtc_array[0].phys_addr;
+
+       /* vRTC's register address may not be page aligned */
+       set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr);
+
+       virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC);
+       virt_base += rtc_paddr & ~PAGE_MASK;
+       vrtc_virt_base = virt_base;
+
+       x86_platform.get_wallclock = vrtc_get_time;
+       x86_platform.set_wallclock = vrtc_set_mmss;
+}
+
+/*
+ * The Moorestown platform has a memory mapped virtual RTC device that emulates
+ * the programming interface of the RTC.
+ */
+
+static struct resource vrtc_resources[] = {
+       [0] = {
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .flags  = IORESOURCE_IRQ,
+       }
+};
+
+static struct platform_device vrtc_device = {
+       .name           = "rtc_mrst",
+       .id             = -1,
+       .resource       = vrtc_resources,
+       .num_resources  = ARRAY_SIZE(vrtc_resources),
+};
+
+/* Register the RTC device if appropriate */
+static int __init mrst_device_create(void)
+{
+       /* No Moorestown, no device */
+       if (!mrst_identify_cpu())
+               return -ENODEV;
+       /* No timer, no device */
+       if (!sfi_mrtc_num)
+               return -ENODEV;
+
+       /* iomem resource */
+       vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr;
+       vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr +
+                               MRST_VRTC_MAP_SZ;
+       /* irq resource */
+       vrtc_resources[1].start = sfi_mrtc_array[0].irq;
+       vrtc_resources[1].end = sfi_mrtc_array[0].irq;
+
+       return platform_device_register(&vrtc_device);
+}
+
+module_init(mrst_device_create);
index dd4c281ffe5720c3ff15f1eceaa09759e17df7d1..ca54875ac795117079b7a9521bfd0bf42bf9f980 100644 (file)
@@ -48,9 +48,9 @@ static void __init mp_sfi_register_lapic_address(unsigned long address)
 /* All CPUs enumerated by SFI must be present and enabled */
 static void __cpuinit mp_sfi_register_lapic(u8 id)
 {
-       if (MAX_APICS - id <= 0) {
+       if (MAX_LOCAL_APIC - id <= 0) {
                pr_warning("Processor #%d invalid (max %d)\n",
-                       id, MAX_APICS);
+                       id, MAX_LOCAL_APIC);
                return;
        }
 
index ba9caa808a9c1b42c6a616968c57e96769039314..df58e9cad96ae9441a4f86f22900a6e0bf05aa64 100644 (file)
@@ -1341,7 +1341,7 @@ uv_activation_descriptor_init(int node, int pnode)
 
        /*
         * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
-        * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub
+        * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE)
         */
        bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE
                                * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
@@ -1490,7 +1490,7 @@ calculate_destination_timeout(void)
 /*
  * initialize the bau_control structure for each cpu
  */
-static void __init uv_init_per_cpu(int nuvhubs)
+static int __init uv_init_per_cpu(int nuvhubs)
 {
        int i;
        int cpu;
@@ -1507,7 +1507,7 @@ static void __init uv_init_per_cpu(int nuvhubs)
        struct bau_control *smaster = NULL;
        struct socket_desc {
                short num_cpus;
-               short cpu_number[16];
+               short cpu_number[MAX_CPUS_PER_SOCKET];
        };
        struct uvhub_desc {
                unsigned short socket_mask;
@@ -1540,6 +1540,10 @@ static void __init uv_init_per_cpu(int nuvhubs)
                sdp = &bdp->socket[socket];
                sdp->cpu_number[sdp->num_cpus] = cpu;
                sdp->num_cpus++;
+               if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
+                       printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus);
+                       return 1;
+               }
        }
        for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
                if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
@@ -1570,6 +1574,12 @@ static void __init uv_init_per_cpu(int nuvhubs)
                                bcp->uvhub_master = hmaster;
                                bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
                                                blade_processor_id;
+                               if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
+                                       printk(KERN_EMERG
+                                               "%d cpus per uvhub invalid\n",
+                                               bcp->uvhub_cpu);
+                                       return 1;
+                               }
                        }
 nextsocket:
                        socket++;
@@ -1595,6 +1605,7 @@ nextsocket:
                bcp->congested_reps = congested_reps;
                bcp->congested_period = congested_period;
        }
+       return 0;
 }
 
 /*
@@ -1625,7 +1636,10 @@ static int __init uv_bau_init(void)
        spin_lock_init(&disable_lock);
        congested_cycles = microsec_2_cycles(congested_response_us);
 
-       uv_init_per_cpu(nuvhubs);
+       if (uv_init_per_cpu(nuvhubs)) {
+               nobau = 1;
+               return 0;
+       }
 
        uv_partition_base_pnode = 0x7fffffff;
        for (uvhub = 0; uvhub < nuvhubs; uvhub++)
index 3371bd053b89f29e14e5f55d88929d2427f123b2..63203767174683b3db1ab410be9263aabe280ab0 100644 (file)
@@ -171,7 +171,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
        ver = m->apicver;
        if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
                printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
-                       m->apicid, MAX_APICS);
+                       m->apicid, MAX_LOCAL_APIC);
                return;
        }
 
index 2c7def95f721c6d08596c9ce852a3ac4a1c54e38..4c8dea513b66adeea7881d7f541237c812e5bf14 100644 (file)
@@ -408,6 +408,9 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
                return_ACPI_STATUS(AE_OK);
        }
 
+       /* Disable the GPE in case it's been enabled already. */
+       (void)acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE);
+
        /*
         * Add the GPE information from above to the gpe_event_info block for
         * use during dispatch of this GPE.
index 660a2728908d6e15f8abbe5b2eae43b4a91df503..0cac7ec0d2ece0764806fae7a11994da0520ab23 100644 (file)
@@ -577,9 +577,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle,
         * as possible (without an NMI being received in the middle of
         * this) - so disable NMIs and initialize the device:
         */
-       acpi_nmi_disable();
        status = acpi_ns_evaluate(info);
-       acpi_nmi_enable();
 
        if (ACPI_SUCCESS(status)) {
                walk_info->num_INI++;
index 9fb9d5ac939db1c071c4cdcaa7fba49364236e25..95649d373071ac93d14bbbb478db1ab4a8cdb801 100644 (file)
@@ -130,8 +130,6 @@ struct acpi_battery {
        unsigned long flags;
 };
 
-static int acpi_battery_update(struct acpi_battery *battery);
-
 #define to_acpi_battery(x) container_of(x, struct acpi_battery, bat);
 
 inline int acpi_battery_present(struct acpi_battery *battery)
@@ -186,9 +184,6 @@ static int acpi_battery_get_property(struct power_supply *psy,
        int ret = 0;
        struct acpi_battery *battery = to_acpi_battery(psy);
 
-       if (acpi_battery_update(battery))
-               return -ENODEV;
-
        if (acpi_battery_present(battery)) {
                /* run battery update only if it is present */
                acpi_battery_get_state(battery);
index 5718566e00f9b27573db228ba74559570ba48f89..d9926afec110997b618b70062d50450847d9d1ff 100644 (file)
@@ -275,13 +275,23 @@ acpi_table_parse_srat(enum acpi_srat_type id,
 int __init acpi_numa_init(void)
 {
        int ret = 0;
+       int nr_cpu_entries = nr_cpu_ids;
+
+#ifdef CONFIG_X86
+       /*
+        * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
+        * SRAT cpu entries could have different order with that in MADT.
+        * So go over all cpu entries in SRAT to get apicid to node mapping.
+        */
+       nr_cpu_entries = MAX_LOCAL_APIC;
+#endif
 
        /* SRAT: Static Resource Affinity Table */
        if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
                acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
-                                    acpi_parse_x2apic_affinity, nr_cpu_ids);
+                                    acpi_parse_x2apic_affinity, nr_cpu_entries);
                acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
-                                    acpi_parse_processor_affinity, nr_cpu_ids);
+                                    acpi_parse_processor_affinity, nr_cpu_entries);
                ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
                                            acpi_parse_memory_affinity,
                                            NR_NODE_MEMBLKS);
index 2b6c21d86b9885571b41679586636fff3b6c53bb..29ef505c487b92efe1b12e79c2231aa85c0298ce 100644 (file)
@@ -705,54 +705,85 @@ static int acpi_bus_get_perf_flags(struct acpi_device *device)
 }
 
 static acpi_status
-acpi_bus_extract_wakeup_device_power_package(struct acpi_device *device,
-                                            union acpi_object *package)
+acpi_bus_extract_wakeup_device_power_package(acpi_handle handle,
+                                            struct acpi_device_wakeup *wakeup)
 {
-       int i = 0;
+       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+       union acpi_object *package = NULL;
        union acpi_object *element = NULL;
+       acpi_status status;
+       int i = 0;
 
-       if (!device || !package || (package->package.count < 2))
+       if (!wakeup)
                return AE_BAD_PARAMETER;
 
+       /* _PRW */
+       status = acpi_evaluate_object(handle, "_PRW", NULL, &buffer);
+       if (ACPI_FAILURE(status)) {
+               ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PRW"));
+               return status;
+       }
+
+       package = (union acpi_object *)buffer.pointer;
+
+       if (!package || (package->package.count < 2)) {
+               status = AE_BAD_DATA;
+               goto out;
+       }
+
        element = &(package->package.elements[0]);
-       if (!element)
-               return AE_BAD_PARAMETER;
+       if (!element) {
+               status = AE_BAD_DATA;
+               goto out;
+       }
        if (element->type == ACPI_TYPE_PACKAGE) {
                if ((element->package.count < 2) ||
                    (element->package.elements[0].type !=
                     ACPI_TYPE_LOCAL_REFERENCE)
-                   || (element->package.elements[1].type != ACPI_TYPE_INTEGER))
-                       return AE_BAD_DATA;
-               device->wakeup.gpe_device =
+                   || (element->package.elements[1].type != ACPI_TYPE_INTEGER)) {
+                       status = AE_BAD_DATA;
+                       goto out;
+               }
+               wakeup->gpe_device =
                    element->package.elements[0].reference.handle;
-               device->wakeup.gpe_number =
+               wakeup->gpe_number =
                    (u32) element->package.elements[1].integer.value;
        } else if (element->type == ACPI_TYPE_INTEGER) {
-               device->wakeup.gpe_number = element->integer.value;
-       } else
-               return AE_BAD_DATA;
+               wakeup->gpe_device = NULL;
+               wakeup->gpe_number = element->integer.value;
+       } else {
+               status = AE_BAD_DATA;
+               goto out;
+       }
 
        element = &(package->package.elements[1]);
        if (element->type != ACPI_TYPE_INTEGER) {
-               return AE_BAD_DATA;
+               status = AE_BAD_DATA;
+               goto out;
        }
-       device->wakeup.sleep_state = element->integer.value;
+       wakeup->sleep_state = element->integer.value;
 
        if ((package->package.count - 2) > ACPI_MAX_HANDLES) {
-               return AE_NO_MEMORY;
+               status = AE_NO_MEMORY;
+               goto out;
        }
-       device->wakeup.resources.count = package->package.count - 2;
-       for (i = 0; i < device->wakeup.resources.count; i++) {
+       wakeup->resources.count = package->package.count - 2;
+       for (i = 0; i < wakeup->resources.count; i++) {
                element = &(package->package.elements[i + 2]);
-               if (element->type != ACPI_TYPE_LOCAL_REFERENCE)
-                       return AE_BAD_DATA;
+               if (element->type != ACPI_TYPE_LOCAL_REFERENCE) {
+                       status = AE_BAD_DATA;
+                       goto out;
+               }
 
-               device->wakeup.resources.handles[i] = element->reference.handle;
+               wakeup->resources.handles[i] = element->reference.handle;
        }
 
-       acpi_gpe_can_wake(device->wakeup.gpe_device, device->wakeup.gpe_number);
+       acpi_gpe_can_wake(wakeup->gpe_device, wakeup->gpe_number);
 
-       return AE_OK;
+ out:
+       kfree(buffer.pointer);
+
+       return status;
 }
 
 static void acpi_bus_set_run_wake_flags(struct acpi_device *device)
@@ -787,26 +818,15 @@ static void acpi_bus_set_run_wake_flags(struct acpi_device *device)
 static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
 {
        acpi_status status = 0;
-       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-       union acpi_object *package = NULL;
        int psw_error;
 
-       /* _PRW */
-       status = acpi_evaluate_object(device->handle, "_PRW", NULL, &buffer);
-       if (ACPI_FAILURE(status)) {
-               ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PRW"));
-               goto end;
-       }
-
-       package = (union acpi_object *)buffer.pointer;
-       status = acpi_bus_extract_wakeup_device_power_package(device, package);
+       status = acpi_bus_extract_wakeup_device_power_package(device->handle,
+                                                             &device->wakeup);
        if (ACPI_FAILURE(status)) {
                ACPI_EXCEPTION((AE_INFO, status, "Extracting _PRW package"));
                goto end;
        }
 
-       kfree(buffer.pointer);
-
        device->wakeup.flags.valid = 1;
        device->wakeup.prepare_count = 0;
        acpi_bus_set_run_wake_flags(device);
@@ -1351,6 +1371,7 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl,
        struct acpi_bus_ops *ops = context;
        int type;
        unsigned long long sta;
+       struct acpi_device_wakeup wakeup;
        struct acpi_device *device;
        acpi_status status;
        int result;
@@ -1360,8 +1381,10 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl,
                return AE_OK;
 
        if (!(sta & ACPI_STA_DEVICE_PRESENT) &&
-           !(sta & ACPI_STA_DEVICE_FUNCTIONING))
+           !(sta & ACPI_STA_DEVICE_FUNCTIONING)) {
+               acpi_bus_extract_wakeup_device_power_package(handle, &wakeup);
                return AE_CTRL_DEPTH;
+       }
 
        /*
         * We may already have an acpi_device from a previous enumeration.  If
index 11ec911016c6ab3e81cefe7e202c4cc4c95762f0..36e2319264bd33d7918979105a4f641da36d3dcd 100644 (file)
@@ -128,16 +128,6 @@ config PDC_ADMA
 
          If unsure, say N.
 
-config PATA_MPC52xx
-       tristate "Freescale MPC52xx SoC internal IDE"
-       depends on PPC_MPC52xx && PPC_BESTCOMM
-       select PPC_BESTCOMM_ATA
-       help
-         This option enables support for integrated IDE controller
-         of the Freescale MPC52xx SoC.
-
-         If unsure, say N.
-
 config PATA_OCTEON_CF
        tristate "OCTEON Boot Bus Compact Flash support"
        depends on CPU_CAVIUM_OCTEON
@@ -366,7 +356,7 @@ config PATA_CS5535
 
 config PATA_CS5536
        tristate "CS5536 PATA support"
-       depends on PCI && X86 && !X86_64
+       depends on PCI
        help
          This option enables support for the AMD CS5536
          companion chip used with the Geode LX processor family.
@@ -491,6 +481,16 @@ config PATA_MARVELL
 
          If unsure, say N.
 
+config PATA_MPC52xx
+       tristate "Freescale MPC52xx SoC internal IDE"
+       depends on PPC_MPC52xx && PPC_BESTCOMM
+       select PPC_BESTCOMM_ATA
+       help
+         This option enables support for integrated IDE controller
+         of the Freescale MPC52xx SoC.
+
+         If unsure, say N.
+
 config PATA_NETCELL
        tristate "NETCELL Revolution RAID support"
        depends on PCI
index c501af5b12b959209bde413d9b8bbe35f7be5068..2b67c900a459865c694e0cf012ebcacbfd8f76bd 100644 (file)
@@ -11,7 +11,6 @@ obj-$(CONFIG_SATA_DWC)                += sata_dwc_460ex.o
 
 # SFF w/ custom DMA
 obj-$(CONFIG_PDC_ADMA)         += pdc_adma.o
-obj-$(CONFIG_PATA_MPC52xx)     += pata_mpc52xx.o
 obj-$(CONFIG_PATA_OCTEON_CF)   += pata_octeon_cf.o
 obj-$(CONFIG_SATA_QSTOR)       += sata_qstor.o
 obj-$(CONFIG_SATA_SX4)         += sata_sx4.o
@@ -52,6 +51,7 @@ obj-$(CONFIG_PATA_IT821X)     += pata_it821x.o
 obj-$(CONFIG_PATA_JMICRON)     += pata_jmicron.o
 obj-$(CONFIG_PATA_MACIO)       += pata_macio.o
 obj-$(CONFIG_PATA_MARVELL)     += pata_marvell.o
+obj-$(CONFIG_PATA_MPC52xx)     += pata_mpc52xx.o
 obj-$(CONFIG_PATA_NETCELL)     += pata_netcell.o
 obj-$(CONFIG_PATA_NINJA32)     += pata_ninja32.o
 obj-$(CONFIG_PATA_NS87415)     += pata_ns87415.o
index 7f77c67d267ca454f63b5501ab1bb82d5fe9f16d..f23d6d46b95b1bf7f4f19fb657f86cadc0ccd6b7 100644 (file)
@@ -4807,9 +4807,6 @@ static void ata_verify_xfer(struct ata_queued_cmd *qc)
 {
        struct ata_device *dev = qc->dev;
 
-       if (ata_tag_internal(qc->tag))
-               return;
-
        if (ata_is_nodata(qc->tf.protocol))
                return;
 
@@ -4858,14 +4855,23 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
                if (unlikely(qc->err_mask))
                        qc->flags |= ATA_QCFLAG_FAILED;
 
-               if (unlikely(qc->flags & ATA_QCFLAG_FAILED)) {
-                       /* always fill result TF for failed qc */
+               /*
+                * Finish internal commands without any further processing
+                * and always with the result TF filled.
+                */
+               if (unlikely(ata_tag_internal(qc->tag))) {
                        fill_result_tf(qc);
+                       __ata_qc_complete(qc);
+                       return;
+               }
 
-                       if (!ata_tag_internal(qc->tag))
-                               ata_qc_schedule_eh(qc);
-                       else
-                               __ata_qc_complete(qc);
+               /*
+                * Non-internal qc has failed.  Fill the result TF and
+                * summon EH.
+                */
+               if (unlikely(qc->flags & ATA_QCFLAG_FAILED)) {
+                       fill_result_tf(qc);
+                       ata_qc_schedule_eh(qc);
                        return;
                }
 
index 5e590504f3aa15c6c3c73450327a0494c264d20f..17a637877d0311abb7c95c7cd6e2359be4a2c70b 100644 (file)
@@ -3275,6 +3275,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
        struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL;
        struct ata_eh_context *ehc = &link->eh_context;
        struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
+       enum ata_lpm_policy old_policy = link->lpm_policy;
        unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
        unsigned int err_mask;
        int rc;
@@ -3338,6 +3339,14 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
                goto fail;
        }
 
+       /*
+        * Low level driver acked the transition.  Issue DIPM command
+        * with the new policy set.
+        */
+       link->lpm_policy = policy;
+       if (ap && ap->slave_link)
+               ap->slave_link->lpm_policy = policy;
+
        /* host config updated, enable DIPM if transitioning to MIN_POWER */
        ata_for_each_dev(dev, link, ENABLED) {
                if (policy == ATA_LPM_MIN_POWER && ata_id_has_dipm(dev->id)) {
@@ -3353,12 +3362,14 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
                }
        }
 
-       link->lpm_policy = policy;
-       if (ap && ap->slave_link)
-               ap->slave_link->lpm_policy = policy;
        return 0;
 
 fail:
+       /* restore the old policy */
+       link->lpm_policy = old_policy;
+       if (ap && ap->slave_link)
+               ap->slave_link->lpm_policy = old_policy;
+
        /* if no device or only one more chance is left, disable LPM */
        if (!dev || ehc->tries[dev->devno] <= 2) {
                ata_link_printk(link, KERN_WARNING,
index d05387d1e14be5aedf2f23dae45e86b7ed989276..484697fef3867dce2c1926f0ad50bb53de3b40e7 100644 (file)
@@ -1532,11 +1532,10 @@ static unsigned int __ata_sff_port_intr(struct ata_port *ap,
                if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR))
                        return ata_sff_idle_irq(ap);
                break;
-       case HSM_ST:
-       case HSM_ST_LAST:
-               break;
-       default:
+       case HSM_ST_IDLE:
                return ata_sff_idle_irq(ap);
+       default:
+               break;
        }
 
        /* check main status, clearing INTRQ if needed */
index 21ee23f89e88b153ee8e35af5d75c61f83f92496..628c8fae5937183f24b9a607c17f0765609a3a5e 100644 (file)
 #include <linux/delay.h>
 #include <linux/libata.h>
 #include <scsi/scsi_host.h>
+
+#ifdef CONFIG_X86_32
 #include <asm/msr.h>
+static int use_msr;
+module_param_named(msr, use_msr, int, 0644);
+MODULE_PARM_DESC(msr, "Force using MSR to configure IDE function (Default: 0)");
+#else
+#undef rdmsr   /* avoid accidental MSR usage on, e.g. x86-64 */
+#undef wrmsr
+#define rdmsr(x, y, z) do { } while (0)
+#define wrmsr(x, y, z) do { } while (0)
+#define use_msr 0
+#endif
 
 #define DRV_NAME       "pata_cs5536"
-#define DRV_VERSION    "0.0.7"
+#define DRV_VERSION    "0.0.8"
 
 enum {
        CFG                     = 0,
@@ -75,8 +87,6 @@ enum {
        IDE_ETC_NODMA           = 0x03,
 };
 
-static int use_msr;
-
 static const u32 msr_reg[4] = {
        MSR_IDE_CFG, MSR_IDE_DTC, MSR_IDE_CAST, MSR_IDE_ETC,
 };
@@ -88,7 +98,7 @@ static const u8 pci_reg[4] = {
 static inline int cs5536_read(struct pci_dev *pdev, int reg, u32 *val)
 {
        if (unlikely(use_msr)) {
-               u32 dummy;
+               u32 dummy __maybe_unused;
 
                rdmsr(msr_reg[reg], *val, dummy);
                return 0;
@@ -294,8 +304,6 @@ MODULE_DESCRIPTION("low-level driver for the CS5536 IDE controller");
 MODULE_LICENSE("GPL");
 MODULE_DEVICE_TABLE(pci, cs5536);
 MODULE_VERSION(DRV_VERSION);
-module_param_named(msr, use_msr, int, 0644);
-MODULE_PARM_DESC(msr, "Force using MSR to configure IDE function (Default: 0)");
 
 module_init(cs5536_init);
 module_exit(cs5536_exit);
index 2b464b631f22697ac5f177d371b94d5646683d8f..0b0625054a87a5697c8e4745bfd49eb2d1bf45c0 100644 (file)
@@ -392,7 +392,10 @@ static int atmtcp_attach(struct atm_vcc *vcc,int itf)
                        atm_dev_put(dev);
                        return -EMEDIUMTYPE;
                }
-               if (PRIV(dev)->vcc) return -EBUSY;
+               if (PRIV(dev)->vcc) {
+                       atm_dev_put(dev);
+                       return -EBUSY;
+               }
        }
        else {
                int error;
index 720148294e648473a1a82cc0c68f50ac1fd923c3..3c6cabcb7d84b0428cdeae91b3f9a98f2bc85715 100644 (file)
@@ -311,8 +311,10 @@ static void hci_uart_tty_close(struct tty_struct *tty)
 
                if (test_and_clear_bit(HCI_UART_PROTO_SET, &hu->flags)) {
                        hu->proto->close(hu);
-                       hci_unregister_dev(hdev);
-                       hci_free_dev(hdev);
+                       if (hdev) {
+                               hci_unregister_dev(hdev);
+                               hci_free_dev(hdev);
+                       }
                }
        }
 }
index 42396df555567660d597ac524631e9020edfbea3..9252e85706ef2ce54728a0c8bc366c80c793033c 100644 (file)
@@ -38,7 +38,7 @@ static int agp_bridges_found;
 
 static void amd64_tlbflush(struct agp_memory *temp)
 {
-       k8_flush_garts();
+       amd_flush_garts();
 }
 
 static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
@@ -124,7 +124,7 @@ static int amd64_fetch_size(void)
        u32 temp;
        struct aper_size_info_32 *values;
 
-       dev = k8_northbridges.nb_misc[0];
+       dev = node_to_amd_nb(0)->misc;
        if (dev==NULL)
                return 0;
 
@@ -181,16 +181,15 @@ static int amd_8151_configure(void)
        unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
        int i;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return 0;
 
        /* Configure AGP regs in each x86-64 host bridge. */
-       for (i = 0; i < k8_northbridges.num; i++) {
+       for (i = 0; i < amd_nb_num(); i++) {
                agp_bridge->gart_bus_addr =
-                               amd64_configure(k8_northbridges.nb_misc[i],
-                                               gatt_bus);
+                       amd64_configure(node_to_amd_nb(i)->misc, gatt_bus);
        }
-       k8_flush_garts();
+       amd_flush_garts();
        return 0;
 }
 
@@ -200,11 +199,11 @@ static void amd64_cleanup(void)
        u32 tmp;
        int i;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return;
 
-       for (i = 0; i < k8_northbridges.num; i++) {
-               struct pci_dev *dev = k8_northbridges.nb_misc[i];
+       for (i = 0; i < amd_nb_num(); i++) {
+               struct pci_dev *dev = node_to_amd_nb(i)->misc;
                /* disable gart translation */
                pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp);
                tmp &= ~GARTEN;
@@ -331,15 +330,15 @@ static __devinit int cache_nbs(struct pci_dev *pdev, u32 cap_ptr)
 {
        int i;
 
-       if (cache_k8_northbridges() < 0)
+       if (amd_cache_northbridges() < 0)
                return -ENODEV;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return -ENODEV;
 
        i = 0;
-       for (i = 0; i < k8_northbridges.num; i++) {
-               struct pci_dev *dev = k8_northbridges.nb_misc[i];
+       for (i = 0; i < amd_nb_num(); i++) {
+               struct pci_dev *dev = node_to_amd_nb(i)->misc;
                if (fix_northbridge(dev, pdev, cap_ptr) < 0) {
                        dev_err(&dev->dev, "no usable aperture found\n");
 #ifdef __x86_64__
@@ -416,7 +415,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
        }
 
        /* shadow x86-64 registers into ULi registers */
-       pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE,
+       pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE,
                               &httfea);
 
        /* if x86-64 aperture base is beyond 4G, exit here */
@@ -484,7 +483,7 @@ static int nforce3_agp_init(struct pci_dev *pdev)
        pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
 
        /* shadow x86-64 registers into NVIDIA registers */
-       pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE,
+       pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE,
                               &apbase);
 
        /* if x86-64 aperture base is beyond 4G, exit here */
@@ -778,7 +777,7 @@ int __init agp_amd64_init(void)
                }
 
                /* First check that we have at least one AMD64 NB */
-               if (!pci_dev_present(k8_nb_ids))
+               if (!pci_dev_present(amd_nb_misc_ids))
                        return -ENODEV;
 
                /* Look for any AGP bridge */
index 16a2847b7cdbfeee43c9dcd212915a2f6b500711..29ac6d499fa6a69221b51ec52ca934c5d41e0912 100644 (file)
@@ -1192,12 +1192,19 @@ static void i9xx_chipset_flush(void)
                writel(1, intel_private.i9xx_flush_page);
 }
 
-static void i965_write_entry(dma_addr_t addr, unsigned int entry,
+static void i965_write_entry(dma_addr_t addr,
+                            unsigned int entry,
                             unsigned int flags)
 {
+       u32 pte_flags;
+
+       pte_flags = I810_PTE_VALID;
+       if (flags == AGP_USER_CACHED_MEMORY)
+               pte_flags |= I830_PTE_SYSTEM_CACHED;
+
        /* Shift high bits down */
        addr |= (addr >> 28) & 0xf0;
-       writel(addr | I810_PTE_VALID, intel_private.gtt + entry);
+       writel(addr | pte_flags, intel_private.gtt + entry);
 }
 
 static bool gen6_check_flags(unsigned int flags)
index 73dcb0ee41fdaebcbefee2b0ddf705bd5a26b1e9..d3d63be2cd37efc15c5b261ab10492ba8b01caf6 100644 (file)
@@ -29,7 +29,6 @@
 #include <linux/ramoops.h>
 
 #define RAMOOPS_KERNMSG_HDR "===="
-#define RAMOOPS_HEADER_SIZE   (5 + sizeof(struct timeval))
 
 #define RECORD_SIZE 4096
 
@@ -65,8 +64,8 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
                        struct ramoops_context, dump);
        unsigned long s1_start, s2_start;
        unsigned long l1_cpy, l2_cpy;
-       int res;
-       char *buf;
+       int res, hdr_size;
+       char *buf, *buf_orig;
        struct timeval timestamp;
 
        /* Only dump oopses if dump_oops is set */
@@ -74,6 +73,8 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
                return;
 
        buf = (char *)(cxt->virt_addr + (cxt->count * RECORD_SIZE));
+       buf_orig = buf;
+
        memset(buf, '\0', RECORD_SIZE);
        res = sprintf(buf, "%s", RAMOOPS_KERNMSG_HDR);
        buf += res;
@@ -81,8 +82,9 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
        res = sprintf(buf, "%lu.%lu\n", (long)timestamp.tv_sec, (long)timestamp.tv_usec);
        buf += res;
 
-       l2_cpy = min(l2, (unsigned long)(RECORD_SIZE - RAMOOPS_HEADER_SIZE));
-       l1_cpy = min(l1, (unsigned long)(RECORD_SIZE - RAMOOPS_HEADER_SIZE) - l2_cpy);
+       hdr_size = buf - buf_orig;
+       l2_cpy = min(l2, (unsigned long)(RECORD_SIZE - hdr_size));
+       l1_cpy = min(l1, (unsigned long)(RECORD_SIZE - hdr_size) - l2_cpy);
 
        s2_start = l2 - l2_cpy;
        s1_start = l1 - l1_cpy;
index c63a438237444a0e35fa05bdef9d0d232839c463..1109f6848a43940b8e8ed738f891f6560f17f7a8 100644 (file)
@@ -355,6 +355,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
                dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
                        (unsigned long)freqs->cpu);
                trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
+               trace_cpu_frequency(freqs->new, freqs->cpu);
                srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
                                CPUFREQ_POSTCHANGE, freqs);
                if (likely(policy) && likely(policy->cpu == freqs->cpu))
index a507108433785f8432b3d15504c3a036b461c1a0..08d5f05378d9efb1df0fe055240e8e8ed9e7a90b 100644 (file)
@@ -107,6 +107,7 @@ static void cpuidle_idle_call(void)
        if (cpuidle_curr_governor->reflect)
                cpuidle_curr_governor->reflect(dev);
        trace_power_end(smp_processor_id());
+       trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
 }
 
 /**
index 411d5bf50fc43cab437dff34d3d9f25dd9928fe0..a25f5f61e0e00becc22b5b5b4d8ce1e4ce6e31b8 100644 (file)
@@ -449,7 +449,7 @@ mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
 static void mv_xor_tasklet(unsigned long data)
 {
        struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
-       __mv_xor_slot_cleanup(chan);
+       mv_xor_slot_cleanup(chan);
 }
 
 static struct mv_xor_desc_slot *
index eca9ba193e94a914aa64740f0b4262b8f7c99b81..df211181fca41627cb0bdea2a089cd9d26f7faf4 100644 (file)
@@ -2917,7 +2917,7 @@ static int __init amd64_edac_init(void)
 
        opstate_init();
 
-       if (cache_k8_northbridges() < 0)
+       if (amd_cache_northbridges() < 0)
                goto err_ret;
 
        msrs = msrs_alloc();
@@ -2934,7 +2934,7 @@ static int __init amd64_edac_init(void)
         * to finish initialization of the MC instances.
         */
        err = -ENODEV;
-       for (nb = 0; nb < k8_northbridges.num; nb++) {
+       for (nb = 0; nb < amd_nb_num(); nb++) {
                if (!pvt_lookup[nb])
                        continue;
 
index 599f6c9e0fbf18661d7607407d11b73cd3cda7ff..d3e55a0ae92be02e5f46ea7d6f7f82c87e75c807 100644 (file)
@@ -56,15 +56,26 @@ static struct cs5535_gpio_chip {
  * registers, see include/linux/cs5535.h.
  */
 
-static void errata_outl(u32 val, unsigned long addr)
+static void errata_outl(struct cs5535_gpio_chip *chip, u32 val,
+               unsigned int reg)
 {
+       unsigned long addr = chip->base + 0x80 + reg;
+
        /*
         * According to the CS5536 errata (#36), after suspend
         * a write to the high bank GPIO register will clear all
         * non-selected bits; the recommended workaround is a
         * read-modify-write operation.
+        *
+        * Don't apply this errata to the edge status GPIOs, as writing
+        * to their lower bits will clear them.
         */
-       val |= inl(addr);
+       if (reg != GPIO_POSITIVE_EDGE_STS && reg != GPIO_NEGATIVE_EDGE_STS) {
+               if (val & 0xffff)
+                       val |= (inl(addr) & 0xffff); /* ignore the high bits */
+               else
+                       val |= (inl(addr) ^ (val >> 16));
+       }
        outl(val, addr);
 }
 
@@ -76,7 +87,7 @@ static void __cs5535_gpio_set(struct cs5535_gpio_chip *chip, unsigned offset,
                outl(1 << offset, chip->base + reg);
        else
                /* high bank register */
-               errata_outl(1 << (offset - 16), chip->base + 0x80 + reg);
+               errata_outl(chip, 1 << (offset - 16), reg);
 }
 
 void cs5535_gpio_set(unsigned offset, unsigned int reg)
@@ -98,7 +109,7 @@ static void __cs5535_gpio_clear(struct cs5535_gpio_chip *chip, unsigned offset,
                outl(1 << (offset + 16), chip->base + reg);
        else
                /* high bank register */
-               errata_outl(1 << offset, chip->base + 0x80 + reg);
+               errata_outl(chip, 1 << offset, reg);
 }
 
 void cs5535_gpio_clear(unsigned offset, unsigned int reg)
index 21da9c19a0cba82e1a8aba6aa52056953efb4f82..649550e2cae99947027df5f2887e77016f554b54 100644 (file)
@@ -1281,6 +1281,9 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
                err = gpio_direction_output(gpio,
                                (flags & GPIOF_INIT_HIGH) ? 1 : 0);
 
+       if (err)
+               gpio_free(gpio);
+
        return err;
 }
 EXPORT_SYMBOL_GPL(gpio_request_one);
index 2762698e0204adc99699839137680bafc7fcc0c7..897e0577e65e0fd425f18edb00512ee414566a64 100644 (file)
@@ -135,7 +135,7 @@ static int __devinit rdc321x_gpio_probe(struct platform_device *pdev)
        struct rdc321x_gpio *rdc321x_gpio_dev;
        struct rdc321x_gpio_pdata *pdata;
 
-       pdata = pdev->dev.platform_data;
+       pdata = platform_get_drvdata(pdev);
        if (!pdata) {
                dev_err(&pdev->dev, "no platform data supplied\n");
                return -ENODEV;
index bede10a0340700b69717caf1da8e81ee0546ab00..2d4e17a004dbb2f09bae780c56f36138c4b1beca 100644 (file)
@@ -241,7 +241,7 @@ void drm_helper_disable_unused_functions(struct drm_device *dev)
        }
 
        list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-               if (encoder->crtc && !drm_helper_encoder_in_use(encoder)) {
+               if (!drm_helper_encoder_in_use(encoder)) {
                        drm_encoder_disable(encoder);
                        /* disconnector encoder from any connector */
                        encoder->crtc = NULL;
@@ -874,7 +874,10 @@ static void output_poll_execute(struct work_struct *work)
                        continue;
 
                connector->status = connector->funcs->detect(connector, false);
-               DRM_DEBUG_KMS("connector status updated to %d\n", connector->status);
+               DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %d to %d\n",
+                             connector->base.id,
+                             drm_get_connector_name(connector),
+                             old_status, connector->status);
                if (old_status != connector->status)
                        changed = true;
        }
index af70337567ce35a0167ffb193d8b1ffae769fedd..d3e8c540f778d01ef97437c3643acbd690716546 100644 (file)
@@ -242,7 +242,7 @@ fail:
 
 static enum drm_connector_status ch7017_detect(struct intel_dvo_device *dvo)
 {
-       return connector_status_unknown;
+       return connector_status_connected;
 }
 
 static enum drm_mode_status ch7017_mode_valid(struct intel_dvo_device *dvo,
index e6800819bca846f6a3fd102ef37e2632858247f8..cb900dc83d950e29f99144e0d39c38102c55ce5b 100644 (file)
@@ -34,6 +34,7 @@
 #include "i915_drm.h"
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "../../../platform/x86/intel_ips.h"
 #include <linux/pci.h>
 #include <linux/vgaarb.h>
 #include <linux/acpi.h>
@@ -1870,6 +1871,26 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
 
+/**
+ * Tells the intel_ips driver that the i915 driver is now loaded, if
+ * IPS got loaded first.
+ *
+ * This awkward dance is so that neither module has to depend on the
+ * other in order for IPS to do the appropriate communication of
+ * GPU turbo limits to i915.
+ */
+static void
+ips_ping_for_i915_load(void)
+{
+       void (*link)(void);
+
+       link = symbol_get(ips_link_to_i915_driver);
+       if (link) {
+               link();
+               symbol_put(ips_link_to_i915_driver);
+       }
+}
+
 /**
  * i915_driver_load - setup chip and create an initial config
  * @dev: DRM device
@@ -2075,6 +2096,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
        dev_priv->mchdev_lock = &mchdev_lock;
        spin_unlock(&mchdev_lock);
 
+       ips_ping_for_i915_load();
+
        return 0;
 
 out_workqueue_free:
index 878fc766a12cc05f6b30c90d53d65a52c6023727..cb8f434292793451225eefdbea071aefe7408e79 100644 (file)
 # define MARIUNIT_CLOCK_GATE_DISABLE           (1 << 18)
 # define SVSMUNIT_CLOCK_GATE_DISABLE           (1 << 1)
 
+#define PCH_3DCGDIS1           0x46024
+# define VFMUNIT_CLOCK_GATE_DISABLE            (1 << 11)
+
 #define FDI_PLL_FREQ_CTL        0x46030
 #define  FDI_PLL_FREQ_CHANGE_REQUEST    (1<<24)
 #define  FDI_PLL_FREQ_LOCK_LIMIT_MASK   0xfff00
 #define ILK_DISPLAY_CHICKEN2   0x42004
 #define  ILK_DPARB_GATE        (1<<22)
 #define  ILK_VSDPFD_FULL       (1<<21)
+#define ILK_DISPLAY_CHICKEN_FUSES      0x42014
+#define  ILK_INTERNAL_GRAPHICS_DISABLE (1<<31)
+#define  ILK_INTERNAL_DISPLAY_DISABLE  (1<<30)
+#define  ILK_DISPLAY_DEBUG_DISABLE     (1<<29)
+#define  ILK_HDCP_DISABLE              (1<<25)
+#define  ILK_eDP_A_DISABLE             (1<<24)
+#define  ILK_DESKTOP                   (1<<23)
 #define ILK_DSPCLK_GATE                0x42020
 #define  ILK_DPARB_CLK_GATE    (1<<5)
 /* According to spec this bit 7/8/9 of 0x42020 should be set to enable FBC */
index d9b7092439ef59ab19acc9546a039ff60f0bb43c..fca523288acad035b9f3d130fe32e9c3c7719989 100644 (file)
@@ -5379,6 +5379,23 @@ static int intel_encoder_clones(struct drm_device *dev, int type_mask)
        return index_mask;
 }
 
+static bool has_edp_a(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       if (!IS_MOBILE(dev))
+               return false;
+
+       if ((I915_READ(DP_A) & DP_DETECTED) == 0)
+               return false;
+
+       if (IS_GEN5(dev) &&
+           (I915_READ(ILK_DISPLAY_CHICKEN_FUSES) & ILK_eDP_A_DISABLE))
+               return false;
+
+       return true;
+}
+
 static void intel_setup_outputs(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -5396,7 +5413,7 @@ static void intel_setup_outputs(struct drm_device *dev)
        if (HAS_PCH_SPLIT(dev)) {
                dpd_is_edp = intel_dpd_is_edp(dev);
 
-               if (IS_MOBILE(dev) && (I915_READ(DP_A) & DP_DETECTED))
+               if (has_edp_a(dev))
                        intel_dp_init(dev, DP_A);
 
                if (dpd_is_edp && (I915_READ(PCH_DP_D) & DP_DETECTED))
@@ -5825,6 +5842,8 @@ void intel_init_clock_gating(struct drm_device *dev)
                        I915_WRITE(PCH_3DCGDIS0,
                                   MARIUNIT_CLOCK_GATE_DISABLE |
                                   SVSMUNIT_CLOCK_GATE_DISABLE);
+                       I915_WRITE(PCH_3DCGDIS1,
+                                  VFMUNIT_CLOCK_GATE_DISABLE);
                }
 
                I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
index df648cb4c29641cec581307b1c40a46d812fd58b..864417cffe9a7c3a45ea52716aae46da97e1b7c1 100644 (file)
@@ -479,6 +479,7 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
        uint16_t address = algo_data->address;
        uint8_t msg[5];
        uint8_t reply[2];
+       unsigned retry;
        int msg_bytes;
        int reply_bytes;
        int ret;
@@ -513,14 +514,33 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
                break;
        }
 
-       for (;;) {
-         ret = intel_dp_aux_ch(intel_dp,
-                               msg, msg_bytes,
-                               reply, reply_bytes);
+       for (retry = 0; retry < 5; retry++) {
+               ret = intel_dp_aux_ch(intel_dp,
+                                     msg, msg_bytes,
+                                     reply, reply_bytes);
                if (ret < 0) {
                        DRM_DEBUG_KMS("aux_ch failed %d\n", ret);
                        return ret;
                }
+
+               switch (reply[0] & AUX_NATIVE_REPLY_MASK) {
+               case AUX_NATIVE_REPLY_ACK:
+                       /* I2C-over-AUX Reply field is only valid
+                        * when paired with AUX ACK.
+                        */
+                       break;
+               case AUX_NATIVE_REPLY_NACK:
+                       DRM_DEBUG_KMS("aux_ch native nack\n");
+                       return -EREMOTEIO;
+               case AUX_NATIVE_REPLY_DEFER:
+                       udelay(100);
+                       continue;
+               default:
+                       DRM_ERROR("aux_ch invalid native reply 0x%02x\n",
+                                 reply[0]);
+                       return -EREMOTEIO;
+               }
+
                switch (reply[0] & AUX_I2C_REPLY_MASK) {
                case AUX_I2C_REPLY_ACK:
                        if (mode == MODE_I2C_READ) {
@@ -528,17 +548,20 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
                        }
                        return reply_bytes - 1;
                case AUX_I2C_REPLY_NACK:
-                       DRM_DEBUG_KMS("aux_ch nack\n");
+                       DRM_DEBUG_KMS("aux_i2c nack\n");
                        return -EREMOTEIO;
                case AUX_I2C_REPLY_DEFER:
-                       DRM_DEBUG_KMS("aux_ch defer\n");
+                       DRM_DEBUG_KMS("aux_i2c defer\n");
                        udelay(100);
                        break;
                default:
-                       DRM_ERROR("aux_ch invalid reply 0x%02x\n", reply[0]);
+                       DRM_ERROR("aux_i2c invalid reply 0x%02x\n", reply[0]);
                        return -EREMOTEIO;
                }
        }
+
+       DRM_ERROR("too many retries, giving up\n");
+       return -EREMOTEIO;
 }
 
 static int
index 89a65be8a3f364359edaf6add71b6259affa3cbd..31cd7e33e8208b8112ed7cb2b278e8cc8b84fc61 100644 (file)
@@ -696,20 +696,17 @@ int intel_wait_ring_buffer(struct drm_device *dev,
        drm_i915_private_t *dev_priv = dev->dev_private;
        u32 head;
 
-       head = intel_read_status_page(ring, 4);
-       if (head) {
-               ring->head = head & HEAD_ADDR;
-               ring->space = ring->head - (ring->tail + 8);
-               if (ring->space < 0)
-                       ring->space += ring->size;
-               if (ring->space >= n)
-                       return 0;
-       }
-
        trace_i915_ring_wait_begin (dev);
        end = jiffies + 3 * HZ;
        do {
-               ring->head = I915_READ_HEAD(ring) & HEAD_ADDR;
+               /* If the reported head position has wrapped or hasn't advanced,
+                * fallback to the slow and accurate path.
+                */
+               head = intel_read_status_page(ring, 4);
+               if (head < ring->actual_head)
+                       head = I915_READ_HEAD(ring);
+               ring->actual_head = head;
+               ring->head = head & HEAD_ADDR;
                ring->space = ring->head - (ring->tail + 8);
                if (ring->space < 0)
                        ring->space += ring->size;
index 3126c2681983e21ba729d9ca599d104f4f1323de..d2cd0f1efeedfdec3102f0436dc70d0d47a2d52d 100644 (file)
@@ -30,8 +30,9 @@ struct  intel_ring_buffer {
        struct          drm_device *dev;
        struct          drm_gem_object *gem_object;
 
-       unsigned int    head;
-       unsigned int    tail;
+       u32             actual_head;
+       u32             head;
+       u32             tail;
        int             space;
        struct intel_hw_status_page status_page;
 
index d97e6cb52d34a102705d9ab59f030cb551333e16..6bc42fa2a6ecc152d796611a3e66311b98b3de9f 100644 (file)
@@ -1908,9 +1908,12 @@ intel_sdvo_select_i2c_bus(struct drm_i915_private *dev_priv,
                speed = mapping->i2c_speed;
        }
 
-       sdvo->i2c = &dev_priv->gmbus[pin].adapter;
-       intel_gmbus_set_speed(sdvo->i2c, speed);
-       intel_gmbus_force_bit(sdvo->i2c, true);
+       if (pin < GMBUS_NUM_PORTS) {
+               sdvo->i2c = &dev_priv->gmbus[pin].adapter;
+               intel_gmbus_set_speed(sdvo->i2c, speed);
+               intel_gmbus_force_bit(sdvo->i2c, true);
+       } else
+               sdvo->i2c = &dev_priv->gmbus[GMBUS_PORT_DPB].adapter;
 }
 
 static bool
@@ -2037,13 +2040,14 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device)
                                           SDVO_COLORIMETRY_RGB256);
                connector->connector_type = DRM_MODE_CONNECTOR_HDMIA;
 
-               intel_sdvo_add_hdmi_properties(intel_sdvo_connector);
                intel_sdvo->is_hdmi = true;
        }
        intel_sdvo->base.clone_mask = ((1 << INTEL_SDVO_NON_TV_CLONE_BIT) |
                                       (1 << INTEL_ANALOG_CLONE_BIT));
 
        intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo);
+       if (intel_sdvo->is_hdmi)
+               intel_sdvo_add_hdmi_properties(intel_sdvo_connector);
 
        return true;
 }
index df2b6f2b35f893d00b475147f8d995a7b3c15bc2..9fbabaa6ee448bb665766a34dc944c694e69eaa3 100644 (file)
@@ -253,7 +253,8 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode)
        case DRM_MODE_DPMS_SUSPEND:
        case DRM_MODE_DPMS_OFF:
                drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
-               atombios_blank_crtc(crtc, ATOM_ENABLE);
+               if (radeon_crtc->enabled)
+                       atombios_blank_crtc(crtc, ATOM_ENABLE);
                if (ASIC_IS_DCE3(rdev))
                        atombios_enable_crtc_memreq(crtc, ATOM_DISABLE);
                atombios_enable_crtc(crtc, ATOM_DISABLE);
@@ -530,7 +531,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
                                        dp_clock = dig_connector->dp_clock;
                                }
                        }
-
+#if 0 /* doesn't work properly on some laptops */
                        /* use recommended ref_div for ss */
                        if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
                                if (ss_enabled) {
@@ -540,7 +541,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
                                        }
                                }
                        }
-
+#endif
                        if (ASIC_IS_AVIVO(rdev)) {
                                /* DVO wants 2x pixel clock if the DVO chip is in 12 bit mode */
                                if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1)
index 4dc5b4714c5a6ae1919246ac55ed32d3bacc44ac..7b337c361a1240f12afc0105bc55d0867172b151 100644 (file)
@@ -748,6 +748,8 @@ void evergreen_pcie_gart_tlb_flush(struct radeon_device *rdev)
        unsigned i;
        u32 tmp;
 
+       WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
+
        WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1));
        for (i = 0; i < rdev->usec_timeout; i++) {
                /* read MC_STATUS */
@@ -1922,7 +1924,6 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
 static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
 {
        struct evergreen_mc_save save;
-       u32 srbm_reset = 0;
        u32 grbm_reset = 0;
 
        dev_info(rdev->dev, "GPU softreset \n");
@@ -1961,16 +1962,6 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
        udelay(50);
        WREG32(GRBM_SOFT_RESET, 0);
        (void)RREG32(GRBM_SOFT_RESET);
-
-       /* reset all the system blocks */
-       srbm_reset = SRBM_SOFT_RESET_ALL_MASK;
-
-       dev_info(rdev->dev, "  SRBM_SOFT_RESET=0x%08X\n", srbm_reset);
-       WREG32(SRBM_SOFT_RESET, srbm_reset);
-       (void)RREG32(SRBM_SOFT_RESET);
-       udelay(50);
-       WREG32(SRBM_SOFT_RESET, 0);
-       (void)RREG32(SRBM_SOFT_RESET);
        /* Wait a little for things to settle down */
        udelay(50);
        dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
@@ -1981,10 +1972,6 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
                RREG32(GRBM_STATUS_SE1));
        dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
                RREG32(SRBM_STATUS));
-       /* After reset we need to reinit the asic as GPU often endup in an
-        * incoherent state.
-        */
-       atom_asic_init(rdev->mode_info.atom_context);
        evergreen_mc_resume(rdev, &save);
        return 0;
 }
@@ -2596,6 +2583,11 @@ int evergreen_resume(struct radeon_device *rdev)
 {
        int r;
 
+       /* reset the asic, the gfx blocks are often in a bad state
+        * after the driver is unloaded or after a resume
+        */
+       if (radeon_asic_reset(rdev))
+               dev_warn(rdev->dev, "GPU reset failed !\n");
        /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
         * posting will perform necessary task to bring back GPU into good
         * shape.
@@ -2712,6 +2704,11 @@ int evergreen_init(struct radeon_device *rdev)
        r = radeon_atombios_init(rdev);
        if (r)
                return r;
+       /* reset the asic, the gfx blocks are often in a bad state
+        * after the driver is unloaded or after a resume
+        */
+       if (radeon_asic_reset(rdev))
+               dev_warn(rdev->dev, "GPU reset failed !\n");
        /* Post card if necessary */
        if (!evergreen_card_posted(rdev)) {
                if (!rdev->bios) {
index 113c70cc8b3930eba7170a94f2c75b9f62e07c1d..a73b53c44359c59a617c6c7aed5a8510a40c39d2 100644 (file)
 #define        HDP_NONSURFACE_BASE                             0x2C04
 #define        HDP_NONSURFACE_INFO                             0x2C08
 #define        HDP_NONSURFACE_SIZE                             0x2C0C
+#define HDP_MEM_COHERENCY_FLUSH_CNTL                   0x5480
 #define HDP_REG_COHERENCY_FLUSH_CNTL                   0x54A0
 #define        HDP_TILING_CONFIG                               0x2F3C
 
index 4d7a2e1bdb90e498138eea542e22884c9e36eb0d..9c92db7c896b6719edfaef92d1484ca300d2cb95 100644 (file)
@@ -1342,13 +1342,19 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev)
        u32 srbm_status;
        u32 grbm_status;
        u32 grbm_status2;
+       struct r100_gpu_lockup *lockup;
        int r;
 
+       if (rdev->family >= CHIP_RV770)
+               lockup = &rdev->config.rv770.lockup;
+       else
+               lockup = &rdev->config.r600.lockup;
+
        srbm_status = RREG32(R_000E50_SRBM_STATUS);
        grbm_status = RREG32(R_008010_GRBM_STATUS);
        grbm_status2 = RREG32(R_008014_GRBM_STATUS2);
        if (!G_008010_GUI_ACTIVE(grbm_status)) {
-               r100_gpu_lockup_update(&rdev->config.r300.lockup, &rdev->cp);
+               r100_gpu_lockup_update(lockup, &rdev->cp);
                return false;
        }
        /* force CP activities */
@@ -1360,7 +1366,7 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev)
                radeon_ring_unlock_commit(rdev);
        }
        rdev->cp.rptr = RREG32(R600_CP_RB_RPTR);
-       return r100_gpu_cp_is_lockup(rdev, &rdev->config.r300.lockup, &rdev->cp);
+       return r100_gpu_cp_is_lockup(rdev, lockup, &rdev->cp);
 }
 
 int r600_asic_reset(struct radeon_device *rdev)
index 0f90fc3482ce205df9991bcd1d7c1170abd912ca..7831e0890210c6b4b7a80f09da51d7b76cc80511 100644 (file)
@@ -315,11 +315,10 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
                if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) {
                        /* the initial DDX does bad things with the CB size occasionally */
                        /* it rounds up height too far for slice tile max but the BO is smaller */
-                       tmp = (height - 7) * 8 * bpe;
-                       if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
-                               dev_warn(p->dev, "%s offset[%d] %d %d %lu too big\n", __func__, i, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i]));
-                               return -EINVAL;
-                       }
+                       /* r600c,g also seem to flush at bad times in some apps resulting in
+                        * bogus values here. So for linear just allow anything to avoid breaking
+                        * broken userspace.
+                        */
                } else {
                        dev_warn(p->dev, "%s offset[%d] %d %d %lu too big\n", __func__, i, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i]));
                        return -EINVAL;
index e12e79326cb115a490ed5d94e0eda2a84ee400db..501966a13f48094a65cfeb17460fa16aeb62a308 100644 (file)
@@ -910,11 +910,6 @@ int radeon_resume_kms(struct drm_device *dev)
        radeon_pm_resume(rdev);
        radeon_restore_bios_scratch_regs(rdev);
 
-       /* turn on display hw */
-       list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-               drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-       }
-
        radeon_fbdev_set_suspend(rdev, 0);
        release_console_sem();
 
@@ -922,6 +917,10 @@ int radeon_resume_kms(struct drm_device *dev)
        radeon_hpd_init(rdev);
        /* blat the mode back in */
        drm_helper_resume_force_mode(dev);
+       /* turn on display hw */
+       list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+               drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+       }
        return 0;
 }
 
index 88e4ea925900ef3aecf71aa17744d3f9d0d8c445..60e689f2d048b71aa85b2a95d759fab06352250c 100644 (file)
@@ -232,9 +232,28 @@ static struct drm_driver driver_old = {
 
 static struct drm_driver kms_driver;
 
+static void radeon_kick_out_firmware_fb(struct pci_dev *pdev)
+{
+       struct apertures_struct *ap;
+       bool primary = false;
+
+       ap = alloc_apertures(1);
+       ap->ranges[0].base = pci_resource_start(pdev, 0);
+       ap->ranges[0].size = pci_resource_len(pdev, 0);
+
+#ifdef CONFIG_X86
+       primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
+#endif
+       remove_conflicting_framebuffers(ap, "radeondrmfb", primary);
+       kfree(ap);
+}
+
 static int __devinit
 radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
+       /* Get rid of things like offb */
+       radeon_kick_out_firmware_fb(pdev);
+
        return drm_get_pci_dev(pdev, ent, &kms_driver);
 }
 
index efa211898fe60204a424b98cdb9295ea2ee99356..6abea32be5e83b80346f5c79b0a966c66c194a7c 100644 (file)
@@ -245,7 +245,7 @@ static int radeonfb_create(struct radeon_fbdev *rfbdev,
                goto out_unref;
        }
        info->apertures->ranges[0].base = rdev->ddev->mode_config.fb_base;
-       info->apertures->ranges[0].size = rdev->mc.real_vram_size;
+       info->apertures->ranges[0].size = rdev->mc.aper_size;
 
        info->fix.mmio_start = 0;
        info->fix.mmio_len = 0;
index 05248f2d7581cd02dc191185b083bd30977ec5ba..92b42db43bcfd9e5706c41bfb9049a26fd967b93 100644 (file)
@@ -234,7 +234,6 @@ static int s3c_hwmon_create_attr(struct device *dev,
        attr->index = channel;
        attr->dev_attr.attr.name  = attrs->in_name;
        attr->dev_attr.attr.mode  = S_IRUGO;
-       attr->dev_attr.attr.owner = THIS_MODULE;
        attr->dev_attr.show = s3c_hwmon_ch_show;
 
        ret =  device_create_file(dev, &attr->dev_attr);
@@ -252,7 +251,6 @@ static int s3c_hwmon_create_attr(struct device *dev,
                attr->index = channel;
                attr->dev_attr.attr.name  = attrs->label_name;
                attr->dev_attr.attr.mode  = S_IRUGO;
-               attr->dev_attr.attr.owner = THIS_MODULE;
                attr->dev_attr.show = s3c_hwmon_label_show;
 
                ret = device_create_file(dev, &attr->dev_attr);
index c131d58bcb50d818e05ee0464581bceedfdbf20f..56ac09d6c9308157d6acda8c99bdbdeef91aa771 100644 (file)
@@ -220,9 +220,8 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
        kt_before = ktime_get_real();
 
        stop_critical_timings();
-#ifndef MODULE
        trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu);
-#endif
+       trace_cpu_idle((eax >> 4) + 1, cpu);
        if (!need_resched()) {
 
                __monitor((void *)&current_thread_info()->flags, 0, 0);
index bcc174e4f3b146db6772cea89c79ac9eb14ce80a..658e75f18d052b5a721ff2e6882109c945318d5b 100644 (file)
@@ -1900,6 +1900,7 @@ static void do_disconnect_req(struct gigaset_capi_ctr *iif,
                if (b3skb == NULL) {
                        dev_err(cs->dev, "%s: out of memory\n", __func__);
                        send_conf(iif, ap, skb, CAPI_MSGOSRESOURCEERR);
+                       kfree(b3cmsg);
                        return;
                }
                capi_cmsg2message(b3cmsg,
index 211e21f34bd57d5f6e2b079dcd22194713b3d5f5..d5a4ade88991eb690e053a6a4218de0512ad9387 100644 (file)
@@ -267,7 +267,7 @@ void led_blink_set(struct led_classdev *led_cdev,
                   unsigned long *delay_off)
 {
        if (led_cdev->blink_set &&
-           led_cdev->blink_set(led_cdev, delay_on, delay_off))
+           !led_cdev->blink_set(led_cdev, delay_on, delay_off))
                return;
 
        /* blink with 1 Hz as default if nothing specified */
index 1b7adabbcee99d4affc24f48e335f33cb3dd409e..6da955dfef48989a1773792f09bf1715326a86b5 100644 (file)
@@ -26,8 +26,8 @@ static struct ir_scancode rc6_mce[] = {
 
        { 0x800f040a, KEY_DELETE },
        { 0x800f040b, KEY_ENTER },
-       { 0x800f040c, KEY_POWER },
-       { 0x800f040d, KEY_PROG1 },              /* Windows MCE button */
+       { 0x800f040c, KEY_POWER },              /* PC Power */
+       { 0x800f040d, KEY_PROG1 },              /* Windows MCE button */
        { 0x800f040e, KEY_MUTE },
        { 0x800f040f, KEY_INFO },
 
@@ -56,31 +56,32 @@ static struct ir_scancode rc6_mce[] = {
        { 0x800f0422, KEY_OK },
        { 0x800f0423, KEY_EXIT },
        { 0x800f0424, KEY_DVD },
-       { 0x800f0425, KEY_TUNER },              /* LiveTV */
-       { 0x800f0426, KEY_EPG },                /* Guide */
-       { 0x800f0427, KEY_ZOOM },               /* Aspect */
+       { 0x800f0425, KEY_TUNER },              /* LiveTV */
+       { 0x800f0426, KEY_EPG },                /* Guide */
+       { 0x800f0427, KEY_ZOOM },               /* Aspect */
 
        { 0x800f043a, KEY_BRIGHTNESSUP },
 
        { 0x800f0446, KEY_TV },
-       { 0x800f0447, KEY_AUDIO },              /* My Music */
-       { 0x800f0448, KEY_PVR },                /* RecordedTV */
+       { 0x800f0447, KEY_AUDIO },              /* My Music */
+       { 0x800f0448, KEY_PVR },                /* RecordedTV */
        { 0x800f0449, KEY_CAMERA },
        { 0x800f044a, KEY_VIDEO },
        { 0x800f044c, KEY_LANGUAGE },
        { 0x800f044d, KEY_TITLE },
-       { 0x800f044e, KEY_PRINT },      /* Print - HP OEM version of remote */
+       { 0x800f044e, KEY_PRINT },      /* Print - HP OEM version of remote */
 
        { 0x800f0450, KEY_RADIO },
 
-       { 0x800f045a, KEY_SUBTITLE },           /* Caption/Teletext */
+       { 0x800f045a, KEY_SUBTITLE },           /* Caption/Teletext */
        { 0x800f045b, KEY_RED },
        { 0x800f045c, KEY_GREEN },
        { 0x800f045d, KEY_YELLOW },
        { 0x800f045e, KEY_BLUE },
 
+       { 0x800f0465, KEY_POWER2 },     /* TV Power */
        { 0x800f046e, KEY_PLAYPAUSE },
-       { 0x800f046f, KEY_MEDIA },      /* Start media application (NEW) */
+       { 0x800f046f, KEY_MEDIA },      /* Start media application (NEW) */
 
        { 0x800f0480, KEY_BRIGHTNESSDOWN },
        { 0x800f0481, KEY_PLAYPAUSE },
index 8418b14ee4d2244ee3b6528549d0d22f35c85159..756656e17bddb035dbff24529155a86e82d19b3c 100644 (file)
@@ -522,10 +522,8 @@ unsigned int lirc_dev_fop_poll(struct file *file, poll_table *wait)
 
        dev_dbg(ir->d.dev, LOGHEAD "poll called\n", ir->d.name, ir->d.minor);
 
-       if (!ir->attached) {
-               mutex_unlock(&ir->irctl_lock);
+       if (!ir->attached)
                return POLLERR;
-       }
 
        poll_wait(file, &ir->buf->wait_poll, wait);
 
@@ -649,18 +647,18 @@ ssize_t lirc_dev_fop_read(struct file *file,
        if (!buf)
                return -ENOMEM;
 
-       if (mutex_lock_interruptible(&ir->irctl_lock))
-               return -ERESTARTSYS;
+       if (mutex_lock_interruptible(&ir->irctl_lock)) {
+               ret = -ERESTARTSYS;
+               goto out_unlocked;
+       }
        if (!ir->attached) {
-               mutex_unlock(&ir->irctl_lock);
-               return -ENODEV;
+               ret = -ENODEV;
+               goto out_locked;
        }
 
        if (length % ir->chunk_size) {
-               dev_dbg(ir->d.dev, LOGHEAD "read result = -EINVAL\n",
-                       ir->d.name, ir->d.minor);
-               mutex_unlock(&ir->irctl_lock);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out_locked;
        }
 
        /*
@@ -711,18 +709,23 @@ ssize_t lirc_dev_fop_read(struct file *file,
                        lirc_buffer_read(ir->buf, buf);
                        ret = copy_to_user((void *)buffer+written, buf,
                                           ir->buf->chunk_size);
-                       written += ir->buf->chunk_size;
+                       if (!ret)
+                               written += ir->buf->chunk_size;
+                       else
+                               ret = -EFAULT;
                }
        }
 
        remove_wait_queue(&ir->buf->wait_poll, &wait);
        set_current_state(TASK_RUNNING);
+
+out_locked:
        mutex_unlock(&ir->irctl_lock);
 
 out_unlocked:
        kfree(buf);
        dev_dbg(ir->d.dev, LOGHEAD "read result = %s (%d)\n",
-               ir->d.name, ir->d.minor, ret ? "-EFAULT" : "OK", ret);
+               ir->d.name, ir->d.minor, ret ? "<fail>" : "<ok>", ret);
 
        return ret ? ret : written;
 }
index 9dce684fd23113d13a3d445995983089cff56c60..392ca24132dafd5456e905fa61c216d566703976 100644 (file)
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/usb.h>
 #include <linux/input.h>
+#include <linux/usb.h>
+#include <linux/usb/input.h>
 #include <media/ir-core.h>
-#include <media/ir-common.h>
 
 #define DRIVER_VERSION "1.91"
 #define DRIVER_AUTHOR  "Jarod Wilson <jarod@wilsonet.com>"
@@ -49,6 +49,7 @@
 #define USB_BUFLEN             32 /* USB reception buffer length */
 #define USB_CTRL_MSG_SZ                2  /* Size of usb ctrl msg on gen1 hw */
 #define MCE_G1_INIT_MSGS       40 /* Init messages on gen1 hw to throw out */
+#define MS_TO_NS(msec)         ((msec) * 1000)
 
 /* MCE constants */
 #define MCE_CMDBUF_SIZE                384  /* MCE Command buffer length */
@@ -74,6 +75,7 @@
 #define MCE_PACKET_LENGTH_MASK 0x1f /* Packet length mask */
 
 /* Sub-commands, which follow MCE_COMMAND_HEADER or MCE_HW_CMD_HEADER */
+#define MCE_CMD_SIG_END                0x01    /* End of signal */
 #define MCE_CMD_PING           0x03    /* Ping device */
 #define MCE_CMD_UNKNOWN                0x04    /* Unknown */
 #define MCE_CMD_UNKNOWN2       0x05    /* Unknown */
@@ -91,6 +93,7 @@
 #define MCE_CMD_G_TXMASK       0x13    /* Set TX port bitmask */
 #define MCE_CMD_S_RXSENSOR     0x14    /* Set RX sensor (std/learning) */
 #define MCE_CMD_G_RXSENSOR     0x15    /* Get RX sensor (std/learning) */
+#define MCE_RSP_PULSE_COUNT    0x15    /* RX pulse count (only if learning) */
 #define MCE_CMD_TX_PORTS       0x16    /* Get number of TX ports */
 #define MCE_CMD_G_WAKESRC      0x17    /* Get wake source */
 #define MCE_CMD_UNKNOWN7       0x18    /* Unknown */
@@ -146,14 +149,16 @@ enum mceusb_model_type {
        MCE_GEN3,
        MCE_GEN2_TX_INV,
        POLARIS_EVK,
+       CX_HYBRID_TV,
 };
 
 struct mceusb_model {
        u32 mce_gen1:1;
        u32 mce_gen2:1;
        u32 mce_gen3:1;
-       u32 tx_mask_inverted:1;
+       u32 tx_mask_normal:1;
        u32 is_polaris:1;
+       u32 no_tx:1;
 
        const char *rc_map;     /* Allow specify a per-board map */
        const char *name;       /* per-board name */
@@ -162,18 +167,18 @@ struct mceusb_model {
 static const struct mceusb_model mceusb_model[] = {
        [MCE_GEN1] = {
                .mce_gen1 = 1,
-               .tx_mask_inverted = 1,
+               .tx_mask_normal = 1,
        },
        [MCE_GEN2] = {
                .mce_gen2 = 1,
        },
        [MCE_GEN2_TX_INV] = {
                .mce_gen2 = 1,
-               .tx_mask_inverted = 1,
+               .tx_mask_normal = 1,
        },
        [MCE_GEN3] = {
                .mce_gen3 = 1,
-               .tx_mask_inverted = 1,
+               .tx_mask_normal = 1,
        },
        [POLARIS_EVK] = {
                .is_polaris = 1,
@@ -183,7 +188,12 @@ static const struct mceusb_model mceusb_model[] = {
                 * to allow testing it
                 */
                .rc_map = RC_MAP_RC5_HAUPPAUGE_NEW,
-               .name = "cx231xx MCE IR",
+               .name = "Conexant Hybrid TV (cx231xx) MCE IR",
+       },
+       [CX_HYBRID_TV] = {
+               .is_polaris = 1,
+               .no_tx = 1, /* tx isn't wired up at all */
+               .name = "Conexant Hybrid TV (cx231xx) MCE IR",
        },
 };
 
@@ -273,6 +283,8 @@ static struct usb_device_id mceusb_dev_table[] = {
        { USB_DEVICE(VENDOR_FORMOSA, 0xe03c) },
        /* Formosa Industrial Computing */
        { USB_DEVICE(VENDOR_FORMOSA, 0xe03e) },
+       /* Fintek eHome Infrared Transceiver (HP branded) */
+       { USB_DEVICE(VENDOR_FINTEK, 0x5168) },
        /* Fintek eHome Infrared Transceiver */
        { USB_DEVICE(VENDOR_FINTEK, 0x0602) },
        /* Fintek eHome Infrared Transceiver (in the AOpen MP45) */
@@ -292,9 +304,12 @@ static struct usb_device_id mceusb_dev_table[] = {
        { USB_DEVICE(VENDOR_NORTHSTAR, 0xe004) },
        /* TiVo PC IR Receiver */
        { USB_DEVICE(VENDOR_TIVO, 0x2000) },
-       /* Conexant SDK */
+       /* Conexant Hybrid TV "Shelby" Polaris SDK */
        { USB_DEVICE(VENDOR_CONEXANT, 0x58a1),
          .driver_info = POLARIS_EVK },
+       /* Conexant Hybrid TV RDU253S Polaris */
+       { USB_DEVICE(VENDOR_CONEXANT, 0x58a5),
+         .driver_info = CX_HYBRID_TV },
        /* Terminating entry */
        { }
 };
@@ -303,7 +318,10 @@ static struct usb_device_id mceusb_dev_table[] = {
 struct mceusb_dev {
        /* ir-core bits */
        struct ir_dev_props *props;
-       struct ir_raw_event rawir;
+
+       /* optional features we can enable */
+       bool carrier_report_enabled;
+       bool learning_enabled;
 
        /* core device bits */
        struct device *dev;
@@ -318,6 +336,8 @@ struct mceusb_dev {
        /* buffers and dma */
        unsigned char *buf_in;
        unsigned int len_in;
+       dma_addr_t dma_in;
+       dma_addr_t dma_out;
 
        enum {
                CMD_HEADER = 0,
@@ -325,15 +345,14 @@ struct mceusb_dev {
                CMD_DATA,
                PARSE_IRDATA,
        } parser_state;
-       u8 cmd, rem;            /* Remaining IR data bytes in packet */
 
-       dma_addr_t dma_in;
-       dma_addr_t dma_out;
+       u8 cmd, rem;            /* Remaining IR data bytes in packet */
 
        struct {
                u32 connected:1;
-               u32 tx_mask_inverted:1;
+               u32 tx_mask_normal:1;
                u32 microsoft_gen1:1;
+               u32 no_tx:1;
        } flags;
 
        /* transmit support */
@@ -408,9 +427,10 @@ static int mceusb_cmdsize(u8 cmd, u8 subcmd)
                case MCE_CMD_UNKNOWN:
                case MCE_CMD_S_CARRIER:
                case MCE_CMD_S_TIMEOUT:
-               case MCE_CMD_G_RXSENSOR:
+               case MCE_RSP_PULSE_COUNT:
                        datasize = 2;
                        break;
+               case MCE_CMD_SIG_END:
                case MCE_CMD_S_TXMASK:
                case MCE_CMD_S_RXSENSOR:
                        datasize = 1;
@@ -433,7 +453,7 @@ static void mceusb_dev_printdata(struct mceusb_dev *ir, char *buf,
                return;
 
        /* skip meaningless 0xb1 0x60 header bytes on orig receiver */
-       if (ir->flags.microsoft_gen1 && !out)
+       if (ir->flags.microsoft_gen1 && !out && !offset)
                skip = 2;
 
        if (len <= skip)
@@ -491,6 +511,9 @@ static void mceusb_dev_printdata(struct mceusb_dev *ir, char *buf,
                break;
        case MCE_COMMAND_HEADER:
                switch (subcmd) {
+               case MCE_CMD_SIG_END:
+                       dev_info(dev, "End of signal\n");
+                       break;
                case MCE_CMD_PING:
                        dev_info(dev, "Ping\n");
                        break;
@@ -525,10 +548,11 @@ static void mceusb_dev_printdata(struct mceusb_dev *ir, char *buf,
                                 inout, data1 == 0x02 ? "short" : "long");
                        break;
                case MCE_CMD_G_RXSENSOR:
-                       if (len == 2)
+               /* aka MCE_RSP_PULSE_COUNT */
+                       if (out)
                                dev_info(dev, "Get receive sensor\n");
-                       else
-                               dev_info(dev, "Received pulse count is %d\n",
+                       else if (ir->learning_enabled)
+                               dev_info(dev, "RX pulse count: %d\n",
                                         ((data1 << 8) | data2));
                        break;
                case MCE_RSP_CMD_INVALID:
@@ -724,16 +748,16 @@ out:
        return ret ? ret : n;
 }
 
-/* Sets active IR outputs -- mce devices typically (all?) have two */
+/* Sets active IR outputs -- mce devices typically have two */
 static int mceusb_set_tx_mask(void *priv, u32 mask)
 {
        struct mceusb_dev *ir = priv;
 
-       if (ir->flags.tx_mask_inverted)
+       if (ir->flags.tx_mask_normal)
+               ir->tx_mask = mask;
+       else
                ir->tx_mask = (mask != MCE_DEFAULT_TX_MASK ?
                                mask ^ MCE_DEFAULT_TX_MASK : mask) << 1;
-       else
-               ir->tx_mask = mask;
 
        return 0;
 }
@@ -752,7 +776,7 @@ static int mceusb_set_tx_carrier(void *priv, u32 carrier)
 
                if (carrier == 0) {
                        ir->carrier = carrier;
-                       cmdbuf[2] = 0x01;
+                       cmdbuf[2] = MCE_CMD_SIG_END;
                        cmdbuf[3] = MCE_IRDATA_TRAILER;
                        dev_dbg(ir->dev, "%s: disabling carrier "
                                "modulation\n", __func__);
@@ -782,6 +806,34 @@ static int mceusb_set_tx_carrier(void *priv, u32 carrier)
        return carrier;
 }
 
+/*
+ * We don't do anything but print debug spew for many of the command bits
+ * we receive from the hardware, but some of them are useful information
+ * we want to store so that we can use them.
+ */
+static void mceusb_handle_command(struct mceusb_dev *ir, int index)
+{
+       u8 hi = ir->buf_in[index + 1] & 0xff;
+       u8 lo = ir->buf_in[index + 2] & 0xff;
+
+       switch (ir->buf_in[index]) {
+       /* 2-byte return value commands */
+       case MCE_CMD_S_TIMEOUT:
+               ir->props->timeout = MS_TO_NS((hi << 8 | lo) / 2);
+               break;
+
+       /* 1-byte return value commands */
+       case MCE_CMD_S_TXMASK:
+               ir->tx_mask = hi;
+               break;
+       case MCE_CMD_S_RXSENSOR:
+               ir->learning_enabled = (hi == 0x02);
+               break;
+       default:
+               break;
+       }
+}
+
 static void mceusb_process_ir_data(struct mceusb_dev *ir, int buf_len)
 {
        DEFINE_IR_RAW_EVENT(rawir);
@@ -791,39 +843,30 @@ static void mceusb_process_ir_data(struct mceusb_dev *ir, int buf_len)
        if (ir->flags.microsoft_gen1)
                i = 2;
 
+       /* if there's no data, just return now */
+       if (buf_len <= i)
+               return;
+
        for (; i < buf_len; i++) {
                switch (ir->parser_state) {
                case SUBCMD:
                        ir->rem = mceusb_cmdsize(ir->cmd, ir->buf_in[i]);
                        mceusb_dev_printdata(ir, ir->buf_in, i - 1,
                                             ir->rem + 2, false);
+                       mceusb_handle_command(ir, i);
                        ir->parser_state = CMD_DATA;
                        break;
                case PARSE_IRDATA:
                        ir->rem--;
                        rawir.pulse = ((ir->buf_in[i] & MCE_PULSE_BIT) != 0);
                        rawir.duration = (ir->buf_in[i] & MCE_PULSE_MASK)
-                                        * MCE_TIME_UNIT * 1000;
-
-                       if ((ir->buf_in[i] & MCE_PULSE_MASK) == 0x7f) {
-                               if (ir->rawir.pulse == rawir.pulse) {
-                                       ir->rawir.duration += rawir.duration;
-                               } else {
-                                       ir->rawir.duration = rawir.duration;
-                                       ir->rawir.pulse = rawir.pulse;
-                               }
-                               if (ir->rem)
-                                       break;
-                       }
-                       rawir.duration += ir->rawir.duration;
-                       ir->rawir.duration = 0;
-                       ir->rawir.pulse = rawir.pulse;
+                                        * MS_TO_NS(MCE_TIME_UNIT);
 
                        dev_dbg(ir->dev, "Storing %s with duration %d\n",
                                rawir.pulse ? "pulse" : "space",
                                rawir.duration);
 
-                       ir_raw_event_store(ir->idev, &rawir);
+                       ir_raw_event_store_with_filter(ir->idev, &rawir);
                        break;
                case CMD_DATA:
                        ir->rem--;
@@ -839,17 +882,10 @@ static void mceusb_process_ir_data(struct mceusb_dev *ir, int buf_len)
                                continue;
                        }
                        ir->rem = (ir->cmd & MCE_PACKET_LENGTH_MASK);
-                       mceusb_dev_printdata(ir, ir->buf_in, i, ir->rem + 1, false);
-                       if (ir->rem) {
+                       mceusb_dev_printdata(ir, ir->buf_in,
+                                            i, ir->rem + 1, false);
+                       if (ir->rem)
                                ir->parser_state = PARSE_IRDATA;
-                               break;
-                       }
-                       /*
-                        * a package with len=0 (e. g. 0x80) means end of
-                        * data. We could use it to do the call to
-                        * ir_raw_event_handle(). For now, we don't need to
-                        * use it.
-                        */
                        break;
                }
 
@@ -984,9 +1020,11 @@ static void mceusb_get_parameters(struct mceusb_dev *ir)
        mce_async_out(ir, GET_CARRIER_FREQ, sizeof(GET_CARRIER_FREQ));
        mce_sync_in(ir, NULL, maxp);
 
-       /* get the transmitter bitmask */
-       mce_async_out(ir, GET_TX_BITMASK, sizeof(GET_TX_BITMASK));
-       mce_sync_in(ir, NULL, maxp);
+       if (!ir->flags.no_tx) {
+               /* get the transmitter bitmask */
+               mce_async_out(ir, GET_TX_BITMASK, sizeof(GET_TX_BITMASK));
+               mce_sync_in(ir, NULL, maxp);
+       }
 
        /* get receiver timeout value */
        mce_async_out(ir, GET_RX_TIMEOUT, sizeof(GET_RX_TIMEOUT));
@@ -1035,12 +1073,18 @@ static struct input_dev *mceusb_init_input_dev(struct mceusb_dev *ir)
        props->priv = ir;
        props->driver_type = RC_DRIVER_IR_RAW;
        props->allowed_protos = IR_TYPE_ALL;
-       props->s_tx_mask = mceusb_set_tx_mask;
-       props->s_tx_carrier = mceusb_set_tx_carrier;
-       props->tx_ir = mceusb_tx_ir;
+       props->timeout = MS_TO_NS(1000);
+       if (!ir->flags.no_tx) {
+               props->s_tx_mask = mceusb_set_tx_mask;
+               props->s_tx_carrier = mceusb_set_tx_carrier;
+               props->tx_ir = mceusb_tx_ir;
+       }
 
        ir->props = props;
 
+       usb_to_input_id(ir->usbdev, &idev->id);
+       idev->dev.parent = ir->dev;
+
        if (mceusb_model[ir->model].rc_map)
                rc_map = mceusb_model[ir->model].rc_map;
 
@@ -1074,16 +1118,16 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
        enum mceusb_model_type model = id->driver_info;
        bool is_gen3;
        bool is_microsoft_gen1;
-       bool tx_mask_inverted;
+       bool tx_mask_normal;
        bool is_polaris;
 
-       dev_dbg(&intf->dev, "%s called\n", __func__);
+       dev_dbg(&intf->dev, "%s called\n", __func__);
 
        idesc  = intf->cur_altsetting;
 
        is_gen3 = mceusb_model[model].mce_gen3;
        is_microsoft_gen1 = mceusb_model[model].mce_gen1;
-       tx_mask_inverted = mceusb_model[model].tx_mask_inverted;
+       tx_mask_normal = mceusb_model[model].tx_mask_normal;
        is_polaris = mceusb_model[model].is_polaris;
 
        if (is_polaris) {
@@ -1107,7 +1151,7 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
                        ep_in = ep;
                        ep_in->bmAttributes = USB_ENDPOINT_XFER_INT;
                        ep_in->bInterval = 1;
-                       dev_dbg(&intf->dev, "acceptable inbound endpoint "
+                       dev_dbg(&intf->dev, "acceptable inbound endpoint "
                                "found\n");
                }
 
@@ -1122,12 +1166,12 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
                        ep_out = ep;
                        ep_out->bmAttributes = USB_ENDPOINT_XFER_INT;
                        ep_out->bInterval = 1;
-                       dev_dbg(&intf->dev, "acceptable outbound endpoint "
+                       dev_dbg(&intf->dev, "acceptable outbound endpoint "
                                "found\n");
                }
        }
        if (ep_in == NULL) {
-               dev_dbg(&intf->dev, "inbound and/or endpoint not found\n");
+               dev_dbg(&intf->dev, "inbound and/or endpoint not found\n");
                return -ENODEV;
        }
 
@@ -1150,11 +1194,10 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
        ir->dev = &intf->dev;
        ir->len_in = maxp;
        ir->flags.microsoft_gen1 = is_microsoft_gen1;
-       ir->flags.tx_mask_inverted = tx_mask_inverted;
+       ir->flags.tx_mask_normal = tx_mask_normal;
+       ir->flags.no_tx = mceusb_model[model].no_tx;
        ir->model = model;
 
-       init_ir_raw_event(&ir->rawir);
-
        /* Saving usb interface data for use by the transmitter routine */
        ir->usb_ep_in = ep_in;
        ir->usb_ep_out = ep_out;
@@ -1191,7 +1234,8 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
 
        mceusb_get_parameters(ir);
 
-       mceusb_set_tx_mask(ir, MCE_DEFAULT_TX_MASK);
+       if (!ir->flags.no_tx)
+               mceusb_set_tx_mask(ir, MCE_DEFAULT_TX_MASK);
 
        usb_set_intfdata(intf, ir);
 
index 301be53aee857298c43855df887a90ece26a2d8c..acc729c79ceca10afb96b08b101095c1a3e1dc03 100644 (file)
@@ -603,6 +603,8 @@ static void nvt_process_rx_ir_data(struct nvt_dev *nvt)
        count = nvt->pkts;
        nvt_dbg_verbose("Processing buffer of len %d", count);
 
+       init_ir_raw_event(&rawir);
+
        for (i = 0; i < count; i++) {
                nvt->pkts--;
                sample = nvt->buf[i];
@@ -643,11 +645,15 @@ static void nvt_process_rx_ir_data(struct nvt_dev *nvt)
                 * indicates end of IR signal, but new data incoming. In both
                 * cases, it means we're ready to call ir_raw_event_handle
                 */
-               if (sample == BUF_PULSE_BIT || ((sample != BUF_LEN_MASK) &&
-                   (sample & BUF_REPEAT_MASK) == BUF_REPEAT_BYTE))
+               if ((sample == BUF_PULSE_BIT) && nvt->pkts) {
+                       nvt_dbg("Calling ir_raw_event_handle (signal end)\n");
                        ir_raw_event_handle(nvt->rdev);
+               }
        }
 
+       nvt_dbg("Calling ir_raw_event_handle (buffer empty)\n");
+       ir_raw_event_handle(nvt->rdev);
+
        if (nvt->pkts) {
                nvt_dbg("Odd, pkts should be 0 now... (its %u)", nvt->pkts);
                nvt->pkts = 0;
index 548381c35bfd1573bf5886b648100e0d06a63f11..3a20aef67d08f97f7f0b92b143a85a67bbe24304 100644 (file)
@@ -34,8 +34,9 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/usb.h>
 #include <linux/input.h>
+#include <linux/usb.h>
+#include <linux/usb/input.h>
 #include <media/ir-core.h>
 
 #define DRIVER_VERSION "1.61"
@@ -140,7 +141,9 @@ static struct usb_driver streamzap_driver = {
 
 static void sz_push(struct streamzap_ir *sz, struct ir_raw_event rawir)
 {
-       ir_raw_event_store(sz->idev, &rawir);
+       dev_dbg(sz->dev, "Storing %s with duration %u us\n",
+               (rawir.pulse ? "pulse" : "space"), rawir.duration);
+       ir_raw_event_store_with_filter(sz->idev, &rawir);
 }
 
 static void sz_push_full_pulse(struct streamzap_ir *sz,
@@ -167,7 +170,6 @@ static void sz_push_full_pulse(struct streamzap_ir *sz,
                        rawir.duration *= 1000;
                        rawir.duration &= IR_MAX_DURATION;
                }
-               dev_dbg(sz->dev, "ls %u\n", rawir.duration);
                sz_push(sz, rawir);
 
                sz->idle = false;
@@ -180,7 +182,6 @@ static void sz_push_full_pulse(struct streamzap_ir *sz,
        sz->sum += rawir.duration;
        rawir.duration *= 1000;
        rawir.duration &= IR_MAX_DURATION;
-       dev_dbg(sz->dev, "p %u\n", rawir.duration);
        sz_push(sz, rawir);
 }
 
@@ -200,7 +201,6 @@ static void sz_push_full_space(struct streamzap_ir *sz,
        rawir.duration += SZ_RESOLUTION / 2;
        sz->sum += rawir.duration;
        rawir.duration *= 1000;
-       dev_dbg(sz->dev, "s %u\n", rawir.duration);
        sz_push(sz, rawir);
 }
 
@@ -221,8 +221,6 @@ static void streamzap_callback(struct urb *urb)
        struct streamzap_ir *sz;
        unsigned int i;
        int len;
-       static int timeout = (((SZ_TIMEOUT * SZ_RESOLUTION * 1000) &
-                               IR_MAX_DURATION) | 0x03000000);
 
        if (!urb)
                return;
@@ -246,7 +244,7 @@ static void streamzap_callback(struct urb *urb)
 
        dev_dbg(sz->dev, "%s: received urb, len %d\n", __func__, len);
        for (i = 0; i < len; i++) {
-               dev_dbg(sz->dev, "sz idx %d: %x\n",
+               dev_dbg(sz->dev, "sz->buf_in[%d]: %x\n",
                        i, (unsigned char)sz->buf_in[i]);
                switch (sz->decoder_state) {
                case PulseSpace:
@@ -273,7 +271,7 @@ static void streamzap_callback(struct urb *urb)
                                DEFINE_IR_RAW_EVENT(rawir);
 
                                rawir.pulse = false;
-                               rawir.duration = timeout;
+                               rawir.duration = sz->props->timeout;
                                sz->idle = true;
                                if (sz->timeout_enabled)
                                        sz_push(sz, rawir);
@@ -335,6 +333,9 @@ static struct input_dev *streamzap_init_input_dev(struct streamzap_ir *sz)
 
        sz->props = props;
 
+       usb_to_input_id(sz->usbdev, &idev->id);
+       idev->dev.parent = sz->dev;
+
        ret = ir_input_register(idev, RC_MAP_STREAMZAP, props, DRIVER_NAME);
        if (ret < 0) {
                dev_err(dev, "remote input device register failed\n");
@@ -444,6 +445,8 @@ static int __devinit streamzap_probe(struct usb_interface *intf,
        sz->decoder_state = PulseSpace;
        /* FIXME: don't yet have a way to set this */
        sz->timeout_enabled = true;
+       sz->props->timeout = (((SZ_TIMEOUT * SZ_RESOLUTION * 1000) &
+                               IR_MAX_DURATION) | 0x03000000);
        #if 0
        /* not yet supported, depends on patches from maxim */
        /* see also: LIRC_GET_REC_RESOLUTION and LIRC_SET_REC_TIMEOUT */
index dfb198d0415bc200b0152c1be105d722261653c5..f16461844c5c0063e281da22b8cd3e27fe6d28f4 100644 (file)
@@ -1989,8 +1989,23 @@ static int cx25840_probe(struct i2c_client *client,
        v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops,
                        V4L2_CID_HUE, -128, 127, 1, 0);
        if (!is_cx2583x(state)) {
-               default_volume = 228 - cx25840_read(client, 0x8d4);
-               default_volume = ((default_volume / 2) + 23) << 9;
+               default_volume = cx25840_read(client, 0x8d4);
+               /*
+                * Enforce the legacy PVR-350/MSP3400 to PVR-150/CX25843 volume
+                * scale mapping limits to avoid -ERANGE errors when
+                * initializing the volume control
+                */
+               if (default_volume > 228) {
+                       /* Bottom out at -96 dB, v4l2 vol range 0x2e00-0x2fff */
+                       default_volume = 228;
+                       cx25840_write(client, 0x8d4, 228);
+               }
+               else if (default_volume < 20) {
+                       /* Top out at + 8 dB, v4l2 vol range 0xfe00-0xffff */
+                       default_volume = 20;
+                       cx25840_write(client, 0x8d4, 20);
+               }
+               default_volume = (((228 - default_volume) >> 1) + 23) << 9;
 
                state->volume = v4l2_ctrl_new_std(&state->hdl,
                        &cx25840_audio_ctrl_ops, V4L2_CID_AUDIO_VOLUME,
index 4aaa47c0eabf54ea8e008a616b0dac531b423ce7..54b7fcd469a8a9b2db267e81ac163d0e579ce31b 100644 (file)
@@ -40,7 +40,6 @@
 #include <sound/control.h>
 #include <sound/initval.h>
 #include <sound/tlv.h>
-#include <media/wm8775.h>
 
 #include "cx88.h"
 #include "cx88-reg.h"
@@ -587,47 +586,26 @@ static int snd_cx88_volume_put(struct snd_kcontrol *kcontrol,
        int left, right, v, b;
        int changed = 0;
        u32 old;
-       struct v4l2_control client_ctl;
-
-       /* Pass volume & balance onto any WM8775 */
-       if (value->value.integer.value[0] >= value->value.integer.value[1]) {
-               v = value->value.integer.value[0] << 10;
-               b = value->value.integer.value[0] ?
-                       (0x8000 * value->value.integer.value[1]) / value->value.integer.value[0] :
-                       0x8000;
-       } else {
-               v = value->value.integer.value[1] << 10;
-               b = value->value.integer.value[1] ?
-               0xffff - (0x8000 * value->value.integer.value[0]) / value->value.integer.value[1] :
-               0x8000;
-       }
-       client_ctl.value = v;
-       client_ctl.id = V4L2_CID_AUDIO_VOLUME;
-       call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-
-       client_ctl.value = b;
-       client_ctl.id = V4L2_CID_AUDIO_BALANCE;
-       call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
 
        left = value->value.integer.value[0] & 0x3f;
        right = value->value.integer.value[1] & 0x3f;
        b = right - left;
        if (b < 0) {
-               v = 0x3f - left;
-               b = (-b) | 0x40;
+           v = 0x3f - left;
+           b = (-b) | 0x40;
        } else {
-               v = 0x3f - right;
+           v = 0x3f - right;
        }
        /* Do we really know this will always be called with IRQs on? */
        spin_lock_irq(&chip->reg_lock);
        old = cx_read(AUD_VOL_CTL);
        if (v != (old & 0x3f)) {
-               cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, (old & ~0x3f) | v);
-               changed = 1;
+           cx_write(AUD_VOL_CTL, (old & ~0x3f) | v);
+           changed = 1;
        }
-       if ((cx_read(AUD_BAL_CTL) & 0x7f) != b) {
-               cx_write(AUD_BAL_CTL, b);
-               changed = 1;
+       if (cx_read(AUD_BAL_CTL) != b) {
+           cx_write(AUD_BAL_CTL, b);
+           changed = 1;
        }
        spin_unlock_irq(&chip->reg_lock);
 
@@ -640,7 +618,7 @@ static const struct snd_kcontrol_new snd_cx88_volume = {
        .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
        .access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
                  SNDRV_CTL_ELEM_ACCESS_TLV_READ,
-       .name = "Analog-TV Volume",
+       .name = "Playback Volume",
        .info = snd_cx88_volume_info,
        .get = snd_cx88_volume_get,
        .put = snd_cx88_volume_put,
@@ -671,14 +649,7 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
        vol = cx_read(AUD_VOL_CTL);
        if (value->value.integer.value[0] != !(vol & bit)) {
                vol ^= bit;
-               cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, vol);
-               /* Pass mute onto any WM8775 */
-               if ((1<<6) == bit) {
-                       struct v4l2_control client_ctl;
-                       client_ctl.value = 0 != (vol & bit);
-                       client_ctl.id = V4L2_CID_AUDIO_MUTE;
-                       call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-               }
+               cx_write(AUD_VOL_CTL, vol);
                ret = 1;
        }
        spin_unlock_irq(&chip->reg_lock);
@@ -687,7 +658,7 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
 
 static const struct snd_kcontrol_new snd_cx88_dac_switch = {
        .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-       .name = "Audio-Out Switch",
+       .name = "Playback Switch",
        .info = snd_ctl_boolean_mono_info,
        .get = snd_cx88_switch_get,
        .put = snd_cx88_switch_put,
@@ -696,49 +667,13 @@ static const struct snd_kcontrol_new snd_cx88_dac_switch = {
 
 static const struct snd_kcontrol_new snd_cx88_source_switch = {
        .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-       .name = "Analog-TV Switch",
+       .name = "Capture Switch",
        .info = snd_ctl_boolean_mono_info,
        .get = snd_cx88_switch_get,
        .put = snd_cx88_switch_put,
        .private_value = (1<<6),
 };
 
-static int snd_cx88_alc_get(struct snd_kcontrol *kcontrol,
-                              struct snd_ctl_elem_value *value)
-{
-       snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
-       struct cx88_core *core = chip->core;
-       struct v4l2_control client_ctl;
-
-       client_ctl.id = V4L2_CID_AUDIO_LOUDNESS;
-       call_hw(core, WM8775_GID, core, g_ctrl, &client_ctl);
-       value->value.integer.value[0] = client_ctl.value ? 1 : 0;
-
-       return 0;
-}
-
-static int snd_cx88_alc_put(struct snd_kcontrol *kcontrol,
-                                      struct snd_ctl_elem_value *value)
-{
-       snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
-       struct cx88_core *core = chip->core;
-       struct v4l2_control client_ctl;
-
-       client_ctl.value = 0 != value->value.integer.value[0];
-       client_ctl.id = V4L2_CID_AUDIO_LOUDNESS;
-       call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-
-       return 0;
-}
-
-static struct snd_kcontrol_new snd_cx88_alc_switch = {
-       .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-       .name = "Line-In ALC Switch",
-       .info = snd_ctl_boolean_mono_info,
-       .get = snd_cx88_alc_get,
-       .put = snd_cx88_alc_put,
-};
-
 /****************************************************************************
                        Basic Flow for Sound Devices
  ****************************************************************************/
@@ -860,7 +795,6 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
 {
        struct snd_card  *card;
        snd_cx88_card_t  *chip;
-       struct v4l2_subdev *sd;
        int              err;
 
        if (devno >= SNDRV_CARDS)
@@ -896,15 +830,6 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
        if (err < 0)
                goto error;
 
-       /* If there's a wm8775 then add a Line-In ALC switch */
-       list_for_each_entry(sd, &chip->core->v4l2_dev.subdevs, list) {
-               if (WM8775_GID == sd->grp_id) {
-                       snd_ctl_add(card, snd_ctl_new1(&snd_cx88_alc_switch,
-                                                      chip));
-                       break;
-               }
-       }
-
        strcpy (card->driver, "CX88x");
        sprintf(card->shortname, "Conexant CX%x", pci->device);
        sprintf(card->longname, "%s at %#llx",
index 9b9e169cce90862ee92f7d80ab7851cdb3cf81cb..0ccc2afd72668e7d2b8384312b01cde4ebb0e86b 100644 (file)
@@ -1007,15 +1007,22 @@ static const struct cx88_board cx88_boards[] = {
                .radio_type     = UNSET,
                .tuner_addr     = ADDR_UNSET,
                .radio_addr     = ADDR_UNSET,
+               .audio_chip = V4L2_IDENT_WM8775,
                .input          = {{
                        .type   = CX88_VMUX_DVB,
                        .vmux   = 0,
+                       /* 2: Line-In */
+                       .audioroute = 2,
                },{
                        .type   = CX88_VMUX_COMPOSITE1,
                        .vmux   = 1,
+                       /* 2: Line-In */
+                       .audioroute = 2,
                },{
                        .type   = CX88_VMUX_SVIDEO,
                        .vmux   = 2,
+                       /* 2: Line-In */
+                       .audioroute = 2,
                }},
                .mpeg           = CX88_MPEG_DVB,
        },
index 62cea9549404bf29d3d58e004af015dcb132f779..d9249e5a04c9088041a67d9a1b997e26dae64761 100644 (file)
@@ -40,7 +40,6 @@
 #include "cx88.h"
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
-#include <media/wm8775.h>
 
 MODULE_DESCRIPTION("v4l2 driver module for cx2388x based TV cards");
 MODULE_AUTHOR("Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]");
@@ -977,7 +976,6 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl)
        const struct cx88_ctrl *c = NULL;
        u32 value,mask;
        int i;
-       struct v4l2_control client_ctl;
 
        for (i = 0; i < CX8800_CTLS; i++) {
                if (cx8800_ctls[i].v.id == ctl->id) {
@@ -991,27 +989,6 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl)
                ctl->value = c->v.minimum;
        if (ctl->value > c->v.maximum)
                ctl->value = c->v.maximum;
-
-       /* Pass changes onto any WM8775 */
-       client_ctl.id = ctl->id;
-       switch (ctl->id) {
-       case V4L2_CID_AUDIO_MUTE:
-               client_ctl.value = ctl->value;
-               break;
-       case V4L2_CID_AUDIO_VOLUME:
-               client_ctl.value = (ctl->value) ?
-                       (0x90 + ctl->value) << 8 : 0;
-               break;
-       case V4L2_CID_AUDIO_BALANCE:
-               client_ctl.value = ctl->value << 9;
-               break;
-       default:
-               client_ctl.id = 0;
-               break;
-       }
-       if (client_ctl.id)
-               call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-
        mask=c->mask;
        switch (ctl->id) {
        case V4L2_CID_AUDIO_BALANCE:
@@ -1558,9 +1535,7 @@ static int radio_queryctrl (struct file *file, void *priv,
        if (c->id <  V4L2_CID_BASE ||
                c->id >= V4L2_CID_LASTP1)
                return -EINVAL;
-       if (c->id == V4L2_CID_AUDIO_MUTE ||
-               c->id == V4L2_CID_AUDIO_VOLUME ||
-               c->id == V4L2_CID_AUDIO_BALANCE) {
+       if (c->id == V4L2_CID_AUDIO_MUTE) {
                for (i = 0; i < CX8800_CTLS; i++) {
                        if (cx8800_ctls[i].v.id == c->id)
                                break;
index e8c732e7ae4f612dfc5004d7f56ba5bd9b9a8d53..c9981e77416a6f8292721b53333bd9cbba1da35c 100644 (file)
@@ -398,19 +398,17 @@ static inline struct cx88_core *to_core(struct v4l2_device *v4l2_dev)
        return container_of(v4l2_dev, struct cx88_core, v4l2_dev);
 }
 
-#define call_hw(core, grpid, o, f, args...) \
+#define call_all(core, o, f, args...)                          \
        do {                                                    \
                if (!core->i2c_rc) {                            \
                        if (core->gate_ctrl)                    \
                                core->gate_ctrl(core, 1);       \
-                       v4l2_device_call_all(&core->v4l2_dev, grpid, o, f, ##args); \
+                       v4l2_device_call_all(&core->v4l2_dev, 0, o, f, ##args); \
                        if (core->gate_ctrl)                    \
                                core->gate_ctrl(core, 0);       \
                }                                               \
        } while (0)
 
-#define call_all(core, o, f, args...) call_hw(core, 0, o, f, ##args)
-
 struct cx8800_dev;
 struct cx8802_dev;
 
index 908e3bc88303989c10f6ae8950286a820d2132e9..2c3007280032ea5472e45e9b1c75ee38aaa36b4c 100644 (file)
@@ -2377,7 +2377,7 @@ static const struct v4l2_file_operations radio_fops = {
        .owner         = THIS_MODULE,
        .open          = em28xx_v4l2_open,
        .release       = em28xx_v4l2_close,
-       .ioctl         = video_ioctl2,
+       .unlocked_ioctl = video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops radio_ioctl_ops = {
index 072bd2d1cfad46f4c5a25ea33f789409c2848a5c..13565cba237d13e0a99922067ceb9edb9edd1e83 100644 (file)
@@ -807,8 +807,6 @@ static int mx2_camera_set_bus_param(struct soc_camera_device *icd,
 
        if (common_flags & SOCAM_PCLK_SAMPLE_RISING)
                csicr1 |= CSICR1_REDGE;
-       if (common_flags & SOCAM_PCLK_SAMPLE_FALLING)
-               csicr1 |= CSICR1_INV_PCLK;
        if (common_flags & SOCAM_VSYNC_ACTIVE_HIGH)
                csicr1 |= CSICR1_SOF_POL;
        if (common_flags & SOCAM_HSYNC_ACTIVE_HIGH)
index 1b93207c89e84efbbf6a4e3e8357a7af7f32f90d..2f500809f53d9b418dd2d3117ce44ef7706d0287 100644 (file)
@@ -522,6 +522,7 @@ static int fimc_cap_streamon(struct file *file, void *priv,
        INIT_LIST_HEAD(&fimc->vid_cap.active_buf_q);
        fimc->vid_cap.active_buf_cnt = 0;
        fimc->vid_cap.frame_count = 0;
+       fimc->vid_cap.buf_index = fimc_hw_get_frame_index(fimc);
 
        set_bit(ST_CAPT_PEND, &fimc->state);
        ret = videobuf_streamon(&fimc->vid_cap.vbq);
@@ -652,6 +653,50 @@ static int fimc_cap_s_ctrl(struct file *file, void *priv,
        return ret;
 }
 
+static int fimc_cap_cropcap(struct file *file, void *fh,
+                           struct v4l2_cropcap *cr)
+{
+       struct fimc_frame *f;
+       struct fimc_ctx *ctx = fh;
+       struct fimc_dev *fimc = ctx->fimc_dev;
+
+       if (cr->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+               return -EINVAL;
+
+       if (mutex_lock_interruptible(&fimc->lock))
+               return -ERESTARTSYS;
+
+       f = &ctx->s_frame;
+       cr->bounds.left         = 0;
+       cr->bounds.top          = 0;
+       cr->bounds.width        = f->o_width;
+       cr->bounds.height       = f->o_height;
+       cr->defrect             = cr->bounds;
+
+       mutex_unlock(&fimc->lock);
+       return 0;
+}
+
+static int fimc_cap_g_crop(struct file *file, void *fh, struct v4l2_crop *cr)
+{
+       struct fimc_frame *f;
+       struct fimc_ctx *ctx = file->private_data;
+       struct fimc_dev *fimc = ctx->fimc_dev;
+
+
+       if (mutex_lock_interruptible(&fimc->lock))
+               return -ERESTARTSYS;
+
+       f = &ctx->s_frame;
+       cr->c.left      = f->offs_h;
+       cr->c.top       = f->offs_v;
+       cr->c.width     = f->width;
+       cr->c.height    = f->height;
+
+       mutex_unlock(&fimc->lock);
+       return 0;
+}
+
 static int fimc_cap_s_crop(struct file *file, void *fh,
                               struct v4l2_crop *cr)
 {
@@ -716,9 +761,9 @@ static const struct v4l2_ioctl_ops fimc_capture_ioctl_ops = {
        .vidioc_g_ctrl                  = fimc_vidioc_g_ctrl,
        .vidioc_s_ctrl                  = fimc_cap_s_ctrl,
 
-       .vidioc_g_crop                  = fimc_vidioc_g_crop,
+       .vidioc_g_crop                  = fimc_cap_g_crop,
        .vidioc_s_crop                  = fimc_cap_s_crop,
-       .vidioc_cropcap                 = fimc_vidioc_cropcap,
+       .vidioc_cropcap                 = fimc_cap_cropcap,
 
        .vidioc_enum_input              = fimc_cap_enum_input,
        .vidioc_s_input                 = fimc_cap_s_input,
@@ -785,7 +830,7 @@ int fimc_register_capture_device(struct fimc_dev *fimc)
        videobuf_queue_dma_contig_init(&vid_cap->vbq, &fimc_qops,
                vid_cap->v4l2_dev.dev, &fimc->irqlock,
                V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE,
-               sizeof(struct fimc_vid_buffer), (void *)ctx);
+               sizeof(struct fimc_vid_buffer), (void *)ctx, NULL);
 
        ret = video_register_device(vfd, VFL_TYPE_GRABBER, -1);
        if (ret) {
index 2e7c547894b687d9fd37288e365ceae704dbcbfc..bb99f2d805d3347975e0ff32603bd2f7b8f44e20 100644 (file)
@@ -50,8 +50,8 @@ static struct fimc_fmt fimc_formats[] = {
                .planes_cnt = 1,
                .flags = FMT_FLAGS_M2M,
        }, {
-               .name = "XRGB-8-8-8-8, 24 bpp",
-               .fourcc = V4L2_PIX_FMT_RGB24,
+               .name = "XRGB-8-8-8-8, 32 bpp",
+               .fourcc = V4L2_PIX_FMT_RGB32,
                .depth = 32,
                .color  = S5P_FIMC_RGB888,
                .buff_cnt = 1,
@@ -983,6 +983,7 @@ int fimc_vidioc_queryctrl(struct file *file, void *priv,
 {
        struct fimc_ctx *ctx = priv;
        struct v4l2_queryctrl *c;
+       int ret = -EINVAL;
 
        c = get_ctrl(qc->id);
        if (c) {
@@ -990,10 +991,14 @@ int fimc_vidioc_queryctrl(struct file *file, void *priv,
                return 0;
        }
 
-       if (ctx->state & FIMC_CTX_CAP)
-               return v4l2_subdev_call(ctx->fimc_dev->vid_cap.sd,
+       if (ctx->state & FIMC_CTX_CAP) {
+               if (mutex_lock_interruptible(&ctx->fimc_dev->lock))
+                       return -ERESTARTSYS;
+               ret = v4l2_subdev_call(ctx->fimc_dev->vid_cap.sd,
                                        core, queryctrl, qc);
-       return -EINVAL;
+               mutex_unlock(&ctx->fimc_dev->lock);
+       }
+       return ret;
 }
 
 int fimc_vidioc_g_ctrl(struct file *file, void *priv,
@@ -1115,7 +1120,7 @@ static int fimc_m2m_s_ctrl(struct file *file, void *priv,
        return 0;
 }
 
-int fimc_vidioc_cropcap(struct file *file, void *fh,
+static int fimc_m2m_cropcap(struct file *file, void *fh,
                        struct v4l2_cropcap *cr)
 {
        struct fimc_frame *frame;
@@ -1139,7 +1144,7 @@ int fimc_vidioc_cropcap(struct file *file, void *fh,
        return 0;
 }
 
-int fimc_vidioc_g_crop(struct file *file, void *fh, struct v4l2_crop *cr)
+static int fimc_m2m_g_crop(struct file *file, void *fh, struct v4l2_crop *cr)
 {
        struct fimc_frame *frame;
        struct fimc_ctx *ctx = file->private_data;
@@ -1167,22 +1172,22 @@ int fimc_try_crop(struct fimc_ctx *ctx, struct v4l2_crop *cr)
        struct fimc_frame *f;
        u32 min_size, halign;
 
-       f = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ?
-               &ctx->s_frame : &ctx->d_frame;
-
        if (cr->c.top < 0 || cr->c.left < 0) {
                v4l2_err(&fimc->m2m.v4l2_dev,
                        "doesn't support negative values for top & left\n");
                return -EINVAL;
        }
 
-       f = ctx_get_frame(ctx, cr->type);
-       if (IS_ERR(f))
-               return PTR_ERR(f);
+       if (cr->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+               f = (ctx->state & FIMC_CTX_CAP) ? &ctx->s_frame : &ctx->d_frame;
+       else if (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT &&
+                ctx->state & FIMC_CTX_M2M)
+               f = &ctx->s_frame;
+       else
+               return -EINVAL;
 
-       min_size = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
-               ? fimc->variant->min_inp_pixsize
-               : fimc->variant->min_out_pixsize;
+       min_size = (f == &ctx->s_frame) ?
+               fimc->variant->min_inp_pixsize : fimc->variant->min_out_pixsize;
 
        if (ctx->state & FIMC_CTX_M2M) {
                if (fimc->id == 1 && fimc->variant->pix_hoff)
@@ -1233,6 +1238,9 @@ static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
        f = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ?
                &ctx->s_frame : &ctx->d_frame;
 
+       if (mutex_lock_interruptible(&fimc->lock))
+               return -ERESTARTSYS;
+
        spin_lock_irqsave(&ctx->slock, flags);
        if (~ctx->state & (FIMC_SRC_FMT | FIMC_DST_FMT)) {
                /* Check to see if scaling ratio is within supported range */
@@ -1241,9 +1249,9 @@ static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
                else
                        ret = fimc_check_scaler_ratio(&cr->c, &ctx->s_frame);
                if (ret) {
-                       spin_unlock_irqrestore(&ctx->slock, flags);
                        v4l2_err(&fimc->m2m.v4l2_dev, "Out of scaler range");
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto scr_unlock;
                }
        }
        ctx->state |= FIMC_PARAMS;
@@ -1253,7 +1261,9 @@ static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
        f->width  = cr->c.width;
        f->height = cr->c.height;
 
+scr_unlock:
        spin_unlock_irqrestore(&ctx->slock, flags);
+       mutex_unlock(&fimc->lock);
        return 0;
 }
 
@@ -1285,9 +1295,9 @@ static const struct v4l2_ioctl_ops fimc_m2m_ioctl_ops = {
        .vidioc_g_ctrl                  = fimc_vidioc_g_ctrl,
        .vidioc_s_ctrl                  = fimc_m2m_s_ctrl,
 
-       .vidioc_g_crop                  = fimc_vidioc_g_crop,
+       .vidioc_g_crop                  = fimc_m2m_g_crop,
        .vidioc_s_crop                  = fimc_m2m_s_crop,
-       .vidioc_cropcap                 = fimc_vidioc_cropcap
+       .vidioc_cropcap                 = fimc_m2m_cropcap
 
 };
 
@@ -1396,7 +1406,7 @@ static const struct v4l2_file_operations fimc_m2m_fops = {
        .open           = fimc_m2m_open,
        .release        = fimc_m2m_release,
        .poll           = fimc_m2m_poll,
-       .ioctl          = video_ioctl2,
+       .unlocked_ioctl = video_ioctl2,
        .mmap           = fimc_m2m_mmap,
 };
 
@@ -1736,6 +1746,7 @@ static struct samsung_fimc_variant fimc0_variant_s5pv310 = {
        .pix_hoff        = 1,
        .has_inp_rot     = 1,
        .has_out_rot     = 1,
+       .has_cistatus2   = 1,
        .min_inp_pixsize = 16,
        .min_out_pixsize = 16,
        .hor_offs_align  = 1,
@@ -1745,6 +1756,7 @@ static struct samsung_fimc_variant fimc0_variant_s5pv310 = {
 
 static struct samsung_fimc_variant fimc2_variant_s5pv310 = {
        .pix_hoff        = 1,
+       .has_cistatus2   = 1,
        .min_inp_pixsize = 16,
        .min_out_pixsize = 16,
        .hor_offs_align  = 1,
index 3e107851656017adfc1a2567956a93e28fd43cd5..4f047d35f8ad9333ce3b4768836685b3d5f2f411 100644 (file)
 
 /*#define DEBUG*/
 
+#include <linux/sched.h>
 #include <linux/types.h>
+#include <linux/videodev2.h>
 #include <media/videobuf-core.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-mem2mem.h>
 #include <media/v4l2-mediabus.h>
 #include <media/s3c_fimc.h>
-#include <linux/videodev2.h>
+
 #include "regs-fimc.h"
 
 #define err(fmt, args...) \
@@ -369,6 +371,7 @@ struct fimc_pix_limit {
  * @pix_hoff: indicate whether horizontal offset is in pixels or in bytes
  * @has_inp_rot: set if has input rotator
  * @has_out_rot: set if has output rotator
+ * @has_cistatus2: 1 if CISTATUS2 register is present in this IP revision
  * @pix_limit: pixel size constraints for the scaler
  * @min_inp_pixsize: minimum input pixel size
  * @min_out_pixsize: minimum output pixel size
@@ -379,6 +382,7 @@ struct samsung_fimc_variant {
        unsigned int    pix_hoff:1;
        unsigned int    has_inp_rot:1;
        unsigned int    has_out_rot:1;
+       unsigned int    has_cistatus2:1;
        struct fimc_pix_limit *pix_limit;
        u16             min_inp_pixsize;
        u16             min_out_pixsize;
@@ -554,11 +558,19 @@ static inline struct fimc_frame *ctx_get_frame(struct fimc_ctx *ctx,
        return frame;
 }
 
+/* Return an index to the buffer actually being written. */
 static inline u32 fimc_hw_get_frame_index(struct fimc_dev *dev)
 {
-       u32 reg = readl(dev->regs + S5P_CISTATUS);
-       return (reg & S5P_CISTATUS_FRAMECNT_MASK) >>
-               S5P_CISTATUS_FRAMECNT_SHIFT;
+       u32 reg;
+
+       if (dev->variant->has_cistatus2) {
+               reg = readl(dev->regs + S5P_CISTATUS2) & 0x3F;
+               return reg > 0 ? --reg : reg;
+       } else {
+               reg = readl(dev->regs + S5P_CISTATUS);
+               return (reg & S5P_CISTATUS_FRAMECNT_MASK) >>
+                       S5P_CISTATUS_FRAMECNT_SHIFT;
+       }
 }
 
 /* -----------------------------------------------------*/
@@ -594,10 +606,6 @@ int fimc_vidioc_g_fmt(struct file *file, void *priv,
                      struct v4l2_format *f);
 int fimc_vidioc_try_fmt(struct file *file, void *priv,
                        struct v4l2_format *f);
-int fimc_vidioc_g_crop(struct file *file, void *fh,
-                      struct v4l2_crop *cr);
-int fimc_vidioc_cropcap(struct file *file, void *fh,
-                       struct v4l2_cropcap *cr);
 int fimc_vidioc_queryctrl(struct file *file, void *priv,
                          struct v4l2_queryctrl *qc);
 int fimc_vidioc_g_ctrl(struct file *file, void *priv,
index a57daedb5b5cf7099e273afb276759b4228fef42..57e33f84fcfa5fa4a4fd35d7eb9998e75ac1d003 100644 (file)
 #define S5P_CISTATUS_VVALID_A          (1 << 15)
 #define S5P_CISTATUS_VVALID_B          (1 << 14)
 
+/* Indexes to the last and the currently processed buffer. */
+#define S5P_CISTATUS2                  0x68
+
 /* Image capture control */
 #define S5P_CIIMGCPT                   0xc0
 #define S5P_CIIMGCPT_IMGCPTEN          (1 << 31)
index 5c209afb0ac8ddb434d5c6335c2661161e36aa5b..2486520582f2c576639a28bc231ccf847a472e4b 100644 (file)
@@ -1980,7 +1980,7 @@ static int __devinit sh_mobile_ceu_probe(struct platform_device *pdev)
                 * we complete the completion.
                 */
 
-               if (!csi2->driver || !csi2->driver->owner) {
+               if (!csi2->driver) {
                        complete(&wait.completion);
                        /* Either too late, or probing failed */
                        bus_unregister_notifier(&platform_bus_type, &wait.notifier);
index 335120c2021bc2cdbf5de98b59f58cc001144ad5..052bd6dfa5a787033d445401ddb114bb40796732 100644 (file)
@@ -405,13 +405,13 @@ static int soc_camera_open(struct file *file)
                ret = soc_camera_set_fmt(icd, &f);
                if (ret < 0)
                        goto esfmt;
+
+               ici->ops->init_videobuf(&icd->vb_vidq, icd);
        }
 
        file->private_data = icd;
        dev_dbg(&icd->dev, "camera device open\n");
 
-       ici->ops->init_videobuf(&icd->vb_vidq, icd);
-
        mutex_unlock(&icd->video_lock);
 
        return 0;
index 135525649086401779ce84cfbb90d87b4c5a7131..fe8ef6419f831f36d52347ff275d8c96d4dc8c5f 100644 (file)
@@ -35,7 +35,6 @@
 #include <media/v4l2-device.h>
 #include <media/v4l2-chip-ident.h>
 #include <media/v4l2-ctrls.h>
-#include <media/wm8775.h>
 
 MODULE_DESCRIPTION("wm8775 driver");
 MODULE_AUTHOR("Ulf Eklund, Hans Verkuil");
@@ -51,16 +50,10 @@ enum {
        TOT_REGS
 };
 
-#define ALC_HOLD 0x85 /* R17: use zero cross detection, ALC hold time 42.6 ms */
-#define ALC_EN 0x100  /* R17: ALC enable */
-
 struct wm8775_state {
        struct v4l2_subdev sd;
        struct v4l2_ctrl_handler hdl;
        struct v4l2_ctrl *mute;
-       struct v4l2_ctrl *vol;
-       struct v4l2_ctrl *bal;
-       struct v4l2_ctrl *loud;
        u8 input;               /* Last selected input (0-0xf) */
 };
 
@@ -92,30 +85,6 @@ static int wm8775_write(struct v4l2_subdev *sd, int reg, u16 val)
        return -1;
 }
 
-static void wm8775_set_audio(struct v4l2_subdev *sd, int quietly)
-{
-       struct wm8775_state *state = to_state(sd);
-       u8 vol_l, vol_r;
-       int muted = 0 != state->mute->val;
-       u16 volume = (u16)state->vol->val;
-       u16 balance = (u16)state->bal->val;
-
-       /* normalize ( 65535 to 0 -> 255 to 0 (+24dB to -103dB) ) */
-       vol_l = (min(65536 - balance, 32768) * volume) >> 23;
-       vol_r = (min(balance, (u16)32768) * volume) >> 23;
-
-       /* Mute */
-       if (muted || quietly)
-               wm8775_write(sd, R21, 0x0c0 | state->input);
-
-       wm8775_write(sd, R14, vol_l | 0x100); /* 0x100= Left channel ADC zero cross enable */
-       wm8775_write(sd, R15, vol_r | 0x100); /* 0x100= Right channel ADC zero cross enable */
-
-       /* Un-mute */
-       if (!muted)
-               wm8775_write(sd, R21, state->input);
-}
-
 static int wm8775_s_routing(struct v4l2_subdev *sd,
                            u32 input, u32 output, u32 config)
 {
@@ -133,26 +102,25 @@ static int wm8775_s_routing(struct v4l2_subdev *sd,
        state->input = input;
        if (!v4l2_ctrl_g_ctrl(state->mute))
                return 0;
-       if (!v4l2_ctrl_g_ctrl(state->vol))
-               return 0;
-       if (!v4l2_ctrl_g_ctrl(state->bal))
-               return 0;
-       wm8775_set_audio(sd, 1);
+       wm8775_write(sd, R21, 0x0c0);
+       wm8775_write(sd, R14, 0x1d4);
+       wm8775_write(sd, R15, 0x1d4);
+       wm8775_write(sd, R21, 0x100 + state->input);
        return 0;
 }
 
 static int wm8775_s_ctrl(struct v4l2_ctrl *ctrl)
 {
        struct v4l2_subdev *sd = to_sd(ctrl);
+       struct wm8775_state *state = to_state(sd);
 
        switch (ctrl->id) {
        case V4L2_CID_AUDIO_MUTE:
-       case V4L2_CID_AUDIO_VOLUME:
-       case V4L2_CID_AUDIO_BALANCE:
-               wm8775_set_audio(sd, 0);
-               return 0;
-       case V4L2_CID_AUDIO_LOUDNESS:
-               wm8775_write(sd, R17, (ctrl->val ? ALC_EN : 0) | ALC_HOLD);
+               wm8775_write(sd, R21, 0x0c0);
+               wm8775_write(sd, R14, 0x1d4);
+               wm8775_write(sd, R15, 0x1d4);
+               if (!ctrl->val)
+                       wm8775_write(sd, R21, 0x100 + state->input);
                return 0;
        }
        return -EINVAL;
@@ -176,7 +144,16 @@ static int wm8775_log_status(struct v4l2_subdev *sd)
 
 static int wm8775_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *freq)
 {
-       wm8775_set_audio(sd, 0);
+       struct wm8775_state *state = to_state(sd);
+
+       /* If I remove this, then it can happen that I have no
+          sound the first time I tune from static to a valid channel.
+          It's difficult to reproduce and is almost certainly related
+          to the zero cross detect circuit. */
+       wm8775_write(sd, R21, 0x0c0);
+       wm8775_write(sd, R14, 0x1d4);
+       wm8775_write(sd, R15, 0x1d4);
+       wm8775_write(sd, R21, 0x100 + state->input);
        return 0;
 }
 
@@ -226,7 +203,6 @@ static int wm8775_probe(struct i2c_client *client,
 {
        struct wm8775_state *state;
        struct v4l2_subdev *sd;
-       int err;
 
        /* Check if the adapter supports the needed features */
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
@@ -240,21 +216,15 @@ static int wm8775_probe(struct i2c_client *client,
                return -ENOMEM;
        sd = &state->sd;
        v4l2_i2c_subdev_init(sd, client, &wm8775_ops);
-       sd->grp_id = WM8775_GID; /* subdev group id */
        state->input = 2;
 
-       v4l2_ctrl_handler_init(&state->hdl, 4);
+       v4l2_ctrl_handler_init(&state->hdl, 1);
        state->mute = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
                        V4L2_CID_AUDIO_MUTE, 0, 1, 1, 0);
-       state->vol = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
-                       V4L2_CID_AUDIO_VOLUME, 0, 65535, (65535+99)/100, 0xCF00); /* 0dB*/
-       state->bal = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
-                       V4L2_CID_AUDIO_BALANCE, 0, 65535, (65535+99)/100, 32768);
-       state->loud = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
-                       V4L2_CID_AUDIO_LOUDNESS, 0, 1, 1, 1);
        sd->ctrl_handler = &state->hdl;
-       err = state->hdl.error;
-       if (err) {
+       if (state->hdl.error) {
+               int err = state->hdl.error;
+
                v4l2_ctrl_handler_free(&state->hdl);
                kfree(state);
                return err;
@@ -266,25 +236,29 @@ static int wm8775_probe(struct i2c_client *client,
        wm8775_write(sd, R23, 0x000);
        /* Disable zero cross detect timeout */
        wm8775_write(sd, R7, 0x000);
-       /* HPF enable, I2S mode, 24-bit */
-       wm8775_write(sd, R11, 0x022);
+       /* Left justified, 24-bit mode */
+       wm8775_write(sd, R11, 0x021);
        /* Master mode, clock ratio 256fs */
        wm8775_write(sd, R12, 0x102);
        /* Powered up */
        wm8775_write(sd, R13, 0x000);
-       /* ALC stereo, ALC target level -5dB FS, ALC max gain +8dB */
-       wm8775_write(sd, R16, 0x1bb);
-       /* Set ALC mode and hold time */
-       wm8775_write(sd, R17, (state->loud->val ? ALC_EN : 0) | ALC_HOLD);
+       /* ADC gain +2.5dB, enable zero cross */
+       wm8775_write(sd, R14, 0x1d4);
+       /* ADC gain +2.5dB, enable zero cross */
+       wm8775_write(sd, R15, 0x1d4);
+       /* ALC Stereo, ALC target level -1dB FS max gain +8dB */
+       wm8775_write(sd, R16, 0x1bf);
+       /* Enable gain control, use zero cross detection,
+          ALC hold time 42.6 ms */
+       wm8775_write(sd, R17, 0x185);
        /* ALC gain ramp up delay 34 s, ALC gain ramp down delay 33 ms */
        wm8775_write(sd, R18, 0x0a2);
        /* Enable noise gate, threshold -72dBfs */
        wm8775_write(sd, R19, 0x005);
-       /* Transient window 4ms, ALC min gain -5dB  */
-       wm8775_write(sd, R20, 0x0fb);
-
-       wm8775_set_audio(sd, 1);      /* set volume/mute/mux */
-
+       /* Transient window 4ms, lower PGA gain limit -1dB */
+       wm8775_write(sd, R20, 0x07a);
+       /* LRBOTH = 1, use input 2. */
+       wm8775_write(sd, R21, 0x102);
        return 0;
 }
 
index dbe1c93c1af3eeac287cc795f16307aa44f86ccc..d9640a623ff483446822e4370127c376d3c631c5 100644 (file)
@@ -303,7 +303,7 @@ static irqreturn_t ab8500_irq(int irq, void *dev)
                        continue;
 
                do {
-                       int bit = __ffs(status);
+                       int bit = __ffs(value);
                        int line = i * 8 + bit;
 
                        handle_nested_irq(ab8500->irq_base + line);
index 7d2563fc15c6096ba30a37f15eb4c95c3cfae68a..76cadcf3b1fee2ccf996d318c01137c53ba852a7 100644 (file)
@@ -1455,7 +1455,11 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
                dev_err(wm831x->dev, "Failed to read parent ID: %d\n", ret);
                goto err;
        }
-       if (ret != 0x6204) {
+       switch (ret) {
+       case 0x6204:
+       case 0x6246:
+               break;
+       default:
                dev_err(wm831x->dev, "Device is not a WM831x: ID %x\n", ret);
                ret = -EINVAL;
                goto err;
@@ -1620,7 +1624,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
        case WM8325:
                ret = mfd_add_devices(wm831x->dev, -1,
                                      wm8320_devs, ARRAY_SIZE(wm8320_devs),
-                                     NULL, 0);
+                                     NULL, wm831x->irq_base);
                break;
 
        default:
index 31ae07a36576678fb27dbd294623f6f8ac21cc9d..57dcf8fa774a7302be1fbb6f0104d6320c6287c8 100644 (file)
@@ -1773,6 +1773,7 @@ int mmc_pm_notify(struct notifier_block *notify_block,
 
        case PM_POST_SUSPEND:
        case PM_POST_HIBERNATION:
+       case PM_POST_RESTORE:
 
                spin_lock_irqsave(&host->lock, flags);
                host->rescan_disable = 0;
index 591ab540b407ad9805c3aa384795b2c59a7d8c69..d3e6a962f42343ac4f30fdd7dc7da8f13ff8a95a 100644 (file)
@@ -69,6 +69,7 @@
 #include <linux/highmem.h>
 
 #include <linux/mmc/host.h>
+#include <linux/mmc/sdio.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -493,10 +494,14 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command
                else if (data->flags & MMC_DATA_WRITE)
                        cmdr |= AT91_MCI_TRCMD_START;
 
-               if (data->flags & MMC_DATA_STREAM)
-                       cmdr |= AT91_MCI_TRTYP_STREAM;
-               if (data->blocks > 1)
-                       cmdr |= AT91_MCI_TRTYP_MULTIPLE;
+               if (cmd->opcode == SD_IO_RW_EXTENDED) {
+                       cmdr |= AT91_MCI_TRTYP_SDIO_BLOCK;
+               } else {
+                       if (data->flags & MMC_DATA_STREAM)
+                               cmdr |= AT91_MCI_TRTYP_STREAM;
+                       if (data->blocks > 1)
+                               cmdr |= AT91_MCI_TRTYP_MULTIPLE;
+               }
        }
        else {
                block_length = 0;
index 301351a5d83853f877953a1ecd10553ecc8f5c22..ad2a7a032cdf02478a80cd87055abce9648e31ca 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/stat.h>
 
 #include <linux/mmc/host.h>
+#include <linux/mmc/sdio.h>
 
 #include <mach/atmel-mci.h>
 #include <linux/atmel-mci.h>
@@ -532,12 +533,17 @@ static u32 atmci_prepare_command(struct mmc_host *mmc,
        data = cmd->data;
        if (data) {
                cmdr |= MCI_CMDR_START_XFER;
-               if (data->flags & MMC_DATA_STREAM)
-                       cmdr |= MCI_CMDR_STREAM;
-               else if (data->blocks > 1)
-                       cmdr |= MCI_CMDR_MULTI_BLOCK;
-               else
-                       cmdr |= MCI_CMDR_BLOCK;
+
+               if (cmd->opcode == SD_IO_RW_EXTENDED) {
+                       cmdr |= MCI_CMDR_SDIO_BLOCK;
+               } else {
+                       if (data->flags & MMC_DATA_STREAM)
+                               cmdr |= MCI_CMDR_STREAM;
+                       else if (data->blocks > 1)
+                               cmdr |= MCI_CMDR_MULTI_BLOCK;
+                       else
+                               cmdr |= MCI_CMDR_BLOCK;
+               }
 
                if (data->flags & MMC_DATA_READ)
                        cmdr |= MCI_CMDR_TRDIR_READ;
index 09b099bfab2b1b75c5dc7e3f0b323393e0124f8a..bdf11d89a4997887fc1c1145b393ef91ad5785a4 100644 (file)
@@ -702,6 +702,7 @@ static int __devinit atl1c_sw_init(struct atl1c_adapter *adapter)
 
 
        adapter->wol = 0;
+       device_set_wakeup_enable(&pdev->dev, false);
        adapter->link_speed = SPEED_0;
        adapter->link_duplex = FULL_DUPLEX;
        adapter->num_rx_queues = AT_DEF_RECEIVE_QUEUE;
@@ -2444,8 +2445,9 @@ static int atl1c_close(struct net_device *netdev)
        return 0;
 }
 
-static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
+static int atl1c_suspend(struct device *dev)
 {
+       struct pci_dev *pdev = to_pci_dev(dev);
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct atl1c_adapter *adapter = netdev_priv(netdev);
        struct atl1c_hw *hw = &adapter->hw;
@@ -2454,7 +2456,6 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
        u32 wol_ctrl_data = 0;
        u16 mii_intr_status_data = 0;
        u32 wufc = adapter->wol;
-       int retval = 0;
 
        atl1c_disable_l0s_l1(hw);
        if (netif_running(netdev)) {
@@ -2462,9 +2463,6 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
                atl1c_down(adapter);
        }
        netif_device_detach(netdev);
-       retval = pci_save_state(pdev);
-       if (retval)
-               return retval;
 
        if (wufc)
                if (atl1c_phy_power_saving(hw) != 0)
@@ -2525,12 +2523,8 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
                AT_WRITE_REG(hw, REG_WOL_CTRL, wol_ctrl_data);
                AT_WRITE_REG(hw, REG_MAC_CTRL, mac_ctrl_data);
 
-               /* pcie patch */
-               device_set_wakeup_enable(&pdev->dev, 1);
-
                AT_WRITE_REG(hw, REG_GPHY_CTRL, GPHY_CTRL_DEFAULT |
                        GPHY_CTRL_EXT_RESET);
-               pci_prepare_to_sleep(pdev);
        } else {
                AT_WRITE_REG(hw, REG_GPHY_CTRL, GPHY_CTRL_POWER_SAVING);
                master_ctrl_data |= MASTER_CTRL_CLK_SEL_DIS;
@@ -2540,25 +2534,17 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
                AT_WRITE_REG(hw, REG_MAC_CTRL, mac_ctrl_data);
                AT_WRITE_REG(hw, REG_WOL_CTRL, 0);
                hw->phy_configured = false; /* re-init PHY when resume */
-               pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
        }
 
-       pci_disable_device(pdev);
-       pci_set_power_state(pdev, pci_choose_state(pdev, state));
-
        return 0;
 }
 
-static int atl1c_resume(struct pci_dev *pdev)
+static int atl1c_resume(struct device *dev)
 {
+       struct pci_dev *pdev = to_pci_dev(dev);
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct atl1c_adapter *adapter = netdev_priv(netdev);
 
-       pci_set_power_state(pdev, PCI_D0);
-       pci_restore_state(pdev);
-       pci_enable_wake(pdev, PCI_D3hot, 0);
-       pci_enable_wake(pdev, PCI_D3cold, 0);
-
        AT_WRITE_REG(&adapter->hw, REG_WOL_CTRL, 0);
        atl1c_reset_pcie(&adapter->hw, ATL1C_PCIE_L0S_L1_DISABLE |
                        ATL1C_PCIE_PHY_RESET);
@@ -2582,7 +2568,12 @@ static int atl1c_resume(struct pci_dev *pdev)
 
 static void atl1c_shutdown(struct pci_dev *pdev)
 {
-       atl1c_suspend(pdev, PMSG_SUSPEND);
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct atl1c_adapter *adapter = netdev_priv(netdev);
+
+       atl1c_suspend(&pdev->dev);
+       pci_wake_from_d3(pdev, adapter->wol);
+       pci_set_power_state(pdev, PCI_D3hot);
 }
 
 static const struct net_device_ops atl1c_netdev_ops = {
@@ -2886,16 +2877,16 @@ static struct pci_error_handlers atl1c_err_handler = {
        .resume = atl1c_io_resume,
 };
 
+static SIMPLE_DEV_PM_OPS(atl1c_pm_ops, atl1c_suspend, atl1c_resume);
+
 static struct pci_driver atl1c_driver = {
        .name     = atl1c_driver_name,
        .id_table = atl1c_pci_tbl,
        .probe    = atl1c_probe,
        .remove   = __devexit_p(atl1c_remove),
-       /* Power Managment Hooks */
-       .suspend  = atl1c_suspend,
-       .resume   = atl1c_resume,
        .shutdown = atl1c_shutdown,
-       .err_handler = &atl1c_err_handler
+       .err_handler = &atl1c_err_handler,
+       .driver.pm = &atl1c_pm_ops,
 };
 
 /*
index 53363108994ee93bed670940b0eb91170a389456..3acf5123a6efa8c0e21d76b7deb487a76d21ea3e 100644 (file)
@@ -3504,6 +3504,8 @@ static int atl1_set_ringparam(struct net_device *netdev,
        struct atl1_rfd_ring rfd_old, rfd_new;
        struct atl1_rrd_ring rrd_old, rrd_new;
        struct atl1_ring_header rhdr_old, rhdr_new;
+       struct atl1_smb smb;
+       struct atl1_cmb cmb;
        int err;
 
        tpd_old = adapter->tpd_ring;
@@ -3544,11 +3546,19 @@ static int atl1_set_ringparam(struct net_device *netdev,
                adapter->rrd_ring = rrd_old;
                adapter->tpd_ring = tpd_old;
                adapter->ring_header = rhdr_old;
+               /*
+                * Save SMB and CMB, since atl1_free_ring_resources
+                * will clear them.
+                */
+               smb = adapter->smb;
+               cmb = adapter->cmb;
                atl1_free_ring_resources(adapter);
                adapter->rfd_ring = rfd_new;
                adapter->rrd_ring = rrd_new;
                adapter->tpd_ring = tpd_new;
                adapter->ring_header = rhdr_new;
+               adapter->smb = smb;
+               adapter->cmb = cmb;
 
                err = atl1_up(adapter);
                if (err)
index 4594a28b1f665ef7923aa462739b96e39182ac5e..d64313b7090e2217038130bb314cedde2bb7c152 100644 (file)
@@ -234,7 +234,7 @@ struct be_adapter {
        u8 __iomem *db;         /* Door Bell */
        u8 __iomem *pcicfg;     /* PCI config space */
 
-       spinlock_t mbox_lock;   /* For serializing mbox cmds to BE card */
+       struct mutex mbox_lock; /* For serializing mbox cmds to BE card */
        struct be_dma_mem mbox_mem;
        /* Mbox mem is adjusted to align to 16 bytes. The allocated addr
         * is stored for freeing purpose */
index e4465d222a7d019654c27c5f4d3db89067f89cf8..1c8c79c9d214e45feb59dbf20b439be1d369702a 100644 (file)
@@ -462,7 +462,8 @@ int be_cmd_fw_init(struct be_adapter *adapter)
        u8 *wrb;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = (u8 *)wrb_from_mbox(adapter);
        *wrb++ = 0xFF;
@@ -476,7 +477,7 @@ int be_cmd_fw_init(struct be_adapter *adapter)
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -491,7 +492,8 @@ int be_cmd_fw_clean(struct be_adapter *adapter)
        if (adapter->eeh_err)
                return -EIO;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = (u8 *)wrb_from_mbox(adapter);
        *wrb++ = 0xFF;
@@ -505,7 +507,7 @@ int be_cmd_fw_clean(struct be_adapter *adapter)
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 int be_cmd_eq_create(struct be_adapter *adapter,
@@ -516,7 +518,8 @@ int be_cmd_eq_create(struct be_adapter *adapter,
        struct be_dma_mem *q_mem = &eq->dma_mem;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -546,7 +549,7 @@ int be_cmd_eq_create(struct be_adapter *adapter,
                eq->created = true;
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -558,7 +561,8 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
        struct be_cmd_req_mac_query *req;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -583,7 +587,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
                memcpy(mac_addr, resp->mac.addr, ETH_ALEN);
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -667,7 +671,8 @@ int be_cmd_cq_create(struct be_adapter *adapter,
        void *ctxt;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -701,7 +706,7 @@ int be_cmd_cq_create(struct be_adapter *adapter,
                cq->created = true;
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -724,7 +729,8 @@ int be_cmd_mccq_create(struct be_adapter *adapter,
        void *ctxt;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -754,7 +760,7 @@ int be_cmd_mccq_create(struct be_adapter *adapter,
                mccq->id = le16_to_cpu(resp->id);
                mccq->created = true;
        }
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -769,7 +775,8 @@ int be_cmd_txq_create(struct be_adapter *adapter,
        void *ctxt;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -801,7 +808,7 @@ int be_cmd_txq_create(struct be_adapter *adapter,
                txq->created = true;
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -816,7 +823,8 @@ int be_cmd_rxq_create(struct be_adapter *adapter,
        struct be_dma_mem *q_mem = &rxq->dma_mem;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -843,7 +851,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter,
                *rss_id = resp->rss_id;
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -862,7 +870,8 @@ int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
        if (adapter->eeh_err)
                return -EIO;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -899,7 +908,7 @@ int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -915,7 +924,8 @@ int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags,
        struct be_cmd_req_if_create *req;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -941,7 +951,7 @@ int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags,
                        *pmac_id = le32_to_cpu(resp->pmac_id);
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -955,7 +965,8 @@ int be_cmd_if_destroy(struct be_adapter *adapter, u32 interface_id)
        if (adapter->eeh_err)
                return -EIO;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -970,7 +981,7 @@ int be_cmd_if_destroy(struct be_adapter *adapter, u32 interface_id)
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -1060,7 +1071,8 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter, char *fw_ver)
        struct be_cmd_req_get_fw_version *req;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -1077,7 +1089,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter, char *fw_ver)
                strncpy(fw_ver, resp->firmware_version_string, FW_VER_LEN);
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -1322,7 +1334,8 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num,
        struct be_cmd_req_query_fw_cfg *req;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -1341,7 +1354,7 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num,
                *caps = le32_to_cpu(resp->function_caps);
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -1352,7 +1365,8 @@ int be_cmd_reset_function(struct be_adapter *adapter)
        struct be_cmd_req_hdr *req;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -1365,7 +1379,7 @@ int be_cmd_reset_function(struct be_adapter *adapter)
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -1376,7 +1390,8 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, u16 table_size)
        u32 myhash[10];
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -1396,7 +1411,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, u16 table_size)
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
index 93354eee2cfd1e679929054c9cc28fe7b35dea51..fd251b59b7f96b4d58ca0f7b22e91e22485d7005 100644 (file)
@@ -2677,7 +2677,7 @@ static int be_ctrl_init(struct be_adapter *adapter)
        }
        memset(mc_cmd_mem->va, 0, mc_cmd_mem->size);
 
-       spin_lock_init(&adapter->mbox_lock);
+       mutex_init(&adapter->mbox_lock);
        spin_lock_init(&adapter->mcc_lock);
        spin_lock_init(&adapter->mcc_cq_lock);
 
index 121b073a6c3fae1985c164071f0a8f285dba0c14..84fbd4ebd778d472a9cf115cfc4faaf72e92939e 100644 (file)
@@ -88,7 +88,12 @@ static void bond_na_send(struct net_device *slave_dev,
        }
 
        if (vlan_id) {
-               skb = vlan_put_tag(skb, vlan_id);
+               /* The Ethernet header is not present yet, so it is
+                * too early to insert a VLAN tag.  Force use of an
+                * out-of-line tag here and let dev_hard_start_xmit()
+                * insert it if the slave hardware can't.
+                */
+               skb = __vlan_hwaccel_put_tag(skb, vlan_id);
                if (!skb) {
                        pr_err("failed to insert VLAN tag\n");
                        return;
index d0ea760ce419f0ae8040bf880cd80013c651a93a..3b16c34ed86e93ddfacd0338ef42f687ab70fce7 100644 (file)
@@ -418,36 +418,11 @@ struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
  * @bond: bond device that got this skb for tx.
  * @skb: hw accel VLAN tagged skb to transmit
  * @slave_dev: slave that is supposed to xmit this skbuff
- *
- * When the bond gets an skb to transmit that is
- * already hardware accelerated VLAN tagged, and it
- * needs to relay this skb to a slave that is not
- * hw accel capable, the skb needs to be "unaccelerated",
- * i.e. strip the hwaccel tag and re-insert it as part
- * of the payload.
  */
 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
                        struct net_device *slave_dev)
 {
-       unsigned short uninitialized_var(vlan_id);
-
-       /* Test vlan_list not vlgrp to catch and handle 802.1p tags */
-       if (!list_empty(&bond->vlan_list) &&
-           !(slave_dev->features & NETIF_F_HW_VLAN_TX) &&
-           vlan_get_tag(skb, &vlan_id) == 0) {
-               skb->dev = slave_dev;
-               skb = vlan_put_tag(skb, vlan_id);
-               if (!skb) {
-                       /* vlan_put_tag() frees the skb in case of error,
-                        * so return success here so the calling functions
-                        * won't attempt to free is again.
-                        */
-                       return 0;
-               }
-       } else {
-               skb->dev = slave_dev;
-       }
-
+       skb->dev = slave_dev;
        skb->priority = 1;
 #ifdef CONFIG_NET_POLL_CONTROLLER
        if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) {
@@ -1203,11 +1178,13 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
                                bond_do_fail_over_mac(bond, new_active,
                                                      old_active);
 
-                       bond->send_grat_arp = bond->params.num_grat_arp;
-                       bond_send_gratuitous_arp(bond);
+                       if (netif_running(bond->dev)) {
+                               bond->send_grat_arp = bond->params.num_grat_arp;
+                               bond_send_gratuitous_arp(bond);
 
-                       bond->send_unsol_na = bond->params.num_unsol_na;
-                       bond_send_unsolicited_na(bond);
+                               bond->send_unsol_na = bond->params.num_unsol_na;
+                               bond_send_unsolicited_na(bond);
+                       }
 
                        write_unlock_bh(&bond->curr_slave_lock);
                        read_unlock(&bond->lock);
@@ -1221,8 +1198,9 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 
        /* resend IGMP joins since active slave has changed or
         * all were sent on curr_active_slave */
-       if ((USES_PRIMARY(bond->params.mode) && new_active) ||
-           bond->params.mode == BOND_MODE_ROUNDROBIN) {
+       if (((USES_PRIMARY(bond->params.mode) && new_active) ||
+            bond->params.mode == BOND_MODE_ROUNDROBIN) &&
+           netif_running(bond->dev)) {
                bond->igmp_retrans = bond->params.resend_igmp;
                queue_delayed_work(bond->wq, &bond->mcast_work, 0);
        }
index c2f081352a037894d6e8238ccb70c39e796c4a69..4feeb2d650a4f478bc6ab4f3c70db3f3563c622d 100644 (file)
@@ -269,11 +269,11 @@ static inline struct slave *bond_get_slave_by_dev(struct bonding *bond, struct n
 
        bond_for_each_slave(bond, slave, i) {
                if (slave->dev == slave_dev) {
-                       break;
+                       return slave;
                }
        }
 
-       return slave;
+       return 0;
 }
 
 static inline struct bonding *bond_get_bond_by_slave(struct slave *slave)
index 92bac19ad60ab0be5383b7566f48cc5dbbf2ab8e..6dff32196c92bcb65d10bda3c54cbb0048dee416 100644 (file)
@@ -940,7 +940,7 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
                                           &udev->l2_ring_map,
                                           GFP_KERNEL | __GFP_COMP);
        if (!udev->l2_ring)
-               return -ENOMEM;
+               goto err_udev;
 
        udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size;
        udev->l2_buf_size = PAGE_ALIGN(udev->l2_buf_size);
@@ -948,7 +948,7 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
                                          &udev->l2_buf_map,
                                          GFP_KERNEL | __GFP_COMP);
        if (!udev->l2_buf)
-               return -ENOMEM;
+               goto err_dma;
 
        write_lock(&cnic_dev_lock);
        list_add(&udev->list, &cnic_udev_list);
@@ -959,6 +959,12 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
        cp->udev = udev;
 
        return 0;
+ err_dma:
+       dma_free_coherent(&udev->pdev->dev, udev->l2_ring_size,
+                         udev->l2_ring, udev->l2_ring_map);
+ err_udev:
+       kfree(udev);
+       return -ENOMEM;
 }
 
 static int cnic_init_uio(struct cnic_dev *dev)
index 1f37ee6b2a2626282fd5a772cc21f821321b9379..d6cf502906cfeeafa41b1072a9aadf8a97a5b475 100644 (file)
@@ -263,6 +263,13 @@ static void ehea_get_ethtool_stats(struct net_device *dev,
 
 static int ehea_set_flags(struct net_device *dev, u32 data)
 {
+       /* Avoid changing the VLAN flags */
+       if ((data & (ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN)) !=
+           (ethtool_op_get_flags(dev) & (ETH_FLAG_RXVLAN |
+                                         ETH_FLAG_TXVLAN))){
+               return -EINVAL;
+       }
+
        return ethtool_op_set_flags(dev, data, ETH_FLAG_LRO
                                        | ETH_FLAG_TXVLAN
                                        | ETH_FLAG_RXVLAN);
index aa56963ad55820e2a763d64c102a36ae1b35dabf..c353bf3113cc335088e4322703a91cd5add4d60d 100644 (file)
@@ -935,7 +935,7 @@ static void epic_init_ring(struct net_device *dev)
 
        /* Fill in the Rx buffers.  Handle allocation failure gracefully. */
        for (i = 0; i < RX_RING_SIZE; i++) {
-               struct sk_buff *skb = dev_alloc_skb(ep->rx_buf_sz);
+               struct sk_buff *skb = dev_alloc_skb(ep->rx_buf_sz + 2);
                ep->rx_skbuff[i] = skb;
                if (skb == NULL)
                        break;
@@ -1233,7 +1233,7 @@ static int epic_rx(struct net_device *dev, int budget)
                entry = ep->dirty_rx % RX_RING_SIZE;
                if (ep->rx_skbuff[entry] == NULL) {
                        struct sk_buff *skb;
-                       skb = ep->rx_skbuff[entry] = dev_alloc_skb(ep->rx_buf_sz);
+                       skb = ep->rx_skbuff[entry] = dev_alloc_skb(ep->rx_buf_sz + 2);
                        if (skb == NULL)
                                break;
                        skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
index 9a6485892b3d48dfbcf783a27b670be83704e803..80d25ed533447735d55804e152bb8d581fc911ef 100644 (file)
@@ -1202,7 +1202,7 @@ static void hamachi_init_ring(struct net_device *dev)
        }
        /* Fill in the Rx buffers.  Handle allocation failure gracefully. */
        for (i = 0; i < RX_RING_SIZE; i++) {
-               struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz);
+               struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz + 2);
                hmp->rx_skbuff[i] = skb;
                if (skb == NULL)
                        break;
@@ -1669,7 +1669,7 @@ static int hamachi_rx(struct net_device *dev)
                entry = hmp->dirty_rx % RX_RING_SIZE;
                desc = &(hmp->rx_ring[entry]);
                if (hmp->rx_skbuff[entry] == NULL) {
-                       struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz);
+                       struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz + 2);
 
                        hmp->rx_skbuff[entry] = skb;
                        if (skb == NULL)
index 8a4d19e5de064bd3fbbb11a2d2e916a28cf593b0..f1047dd8a526715857929e8ca86a47d846e5a15e 100644 (file)
@@ -690,6 +690,7 @@ static void block_output(struct net_device *dev, int count,
 static struct pcmcia_device_id axnet_ids[] = {
        PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x016c, 0x0081),
        PCMCIA_DEVICE_MANF_CARD(0x018a, 0x0301),
+       PCMCIA_DEVICE_MANF_CARD(0x01bf, 0x2328),
        PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0301),
        PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0303),
        PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0309),
index d05c44692f08306f4dc04dbbb30098c46fd8e362..2c158910f7ea8f1784f629df23bfee2a21cc41c9 100644 (file)
@@ -1493,7 +1493,6 @@ static struct pcmcia_device_id pcnet_ids[] = {
        PCMCIA_DEVICE_MANF_CARD(0x0149, 0x4530),
        PCMCIA_DEVICE_MANF_CARD(0x0149, 0xc1ab),
        PCMCIA_DEVICE_MANF_CARD(0x0186, 0x0110),
-       PCMCIA_DEVICE_MANF_CARD(0x01bf, 0x2328),
        PCMCIA_DEVICE_MANF_CARD(0x01bf, 0x8041),
        PCMCIA_DEVICE_MANF_CARD(0x0213, 0x2452),
        PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0300),
index 39659976a1acfb1589834368aab20080c894d6eb..89294b43c4a901f8ce927e1981ad317b95538f44 100644 (file)
@@ -1285,6 +1285,11 @@ ppp_push(struct ppp *ppp)
 }
 
 #ifdef CONFIG_PPP_MULTILINK
+static bool mp_protocol_compress __read_mostly = true;
+module_param(mp_protocol_compress, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(mp_protocol_compress,
+                "compress protocol id in multilink fragments");
+
 /*
  * Divide a packet to be transmitted into fragments and
  * send them out the individual links.
@@ -1347,10 +1352,10 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
        if (nfree == 0 || nfree < navail / 2)
                return 0; /* can't take now, leave it in xmit_pending */
 
-       /* Do protocol field compression (XXX this should be optional) */
+       /* Do protocol field compression */
        p = skb->data;
        len = skb->len;
-       if (*p == 0) {
+       if (*p == 0 && mp_protocol_compress) {
                ++p;
                --len;
        }
index 0a66fed52e8ed48e9a82bfdcfb831957acc491cf..16c62659cdd96040e7831d0e7d7e18bc58832752 100644 (file)
@@ -412,7 +412,7 @@ static  int skfp_driver_init(struct net_device *dev)
                bp->SharedMemAddr = pci_alloc_consistent(&bp->pdev,
                                                         bp->SharedMemSize,
                                                         &bp->SharedMemDMA);
-               if (!bp->SharedMemSize) {
+               if (!bp->SharedMemAddr) {
                        printk("could not allocate mem for ");
                        printk("hardware module: %ld byte\n",
                               bp->SharedMemSize);
index 4adf124227877e704fe66a7bb74919738bbd46c5..a4f2bd52e546f522995df2fb7fae5721f94e0244 100644 (file)
@@ -148,7 +148,7 @@ static int full_duplex[MAX_UNITS] = {0, };
  * This SUCKS.
  * We need a much better method to determine if dma_addr_t is 64-bit.
  */
-#if (defined(__i386__) && defined(CONFIG_HIGHMEM64G)) || defined(__x86_64__) || defined (__ia64__) || defined(__alpha__) || defined(__mips64__) || (defined(__mips__) && defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) || (defined(__powerpc64__) || defined(CONFIG_PHYS_64BIT))
+#if (defined(__i386__) && defined(CONFIG_HIGHMEM64G)) || defined(__x86_64__) || defined (__ia64__) || defined(__alpha__) || (defined(CONFIG_MIPS) && ((defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) || defined(CONFIG_64BIT))) || (defined(__powerpc64__) || defined(CONFIG_PHYS_64BIT))
 /* 64-bit dma_addr_t */
 #define ADDR_64BITS    /* This chip uses 64 bit addresses. */
 #define netdrv_addr_t __le64
index 3ed2a67bd6d36c5da4608570bc8d494faa614f59..b409d7ec4ac15ff5e5eb82c2aff17111d48954a3 100644 (file)
@@ -1016,7 +1016,7 @@ static void init_ring(struct net_device *dev)
 
        /* Fill in the Rx buffers.  Handle allocation failure gracefully. */
        for (i = 0; i < RX_RING_SIZE; i++) {
-               struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz);
+               struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + 2);
                np->rx_skbuff[i] = skb;
                if (skb == NULL)
                        break;
@@ -1407,7 +1407,7 @@ static void refill_rx (struct net_device *dev)
                struct sk_buff *skb;
                entry = np->dirty_rx % RX_RING_SIZE;
                if (np->rx_skbuff[entry] == NULL) {
-                       skb = dev_alloc_skb(np->rx_buf_sz);
+                       skb = dev_alloc_skb(np->rx_buf_sz + 2);
                        np->rx_skbuff[entry] = skb;
                        if (skb == NULL)
                                break;          /* Better luck next round. */
index 8b3dc1eb401541e8e1c3de31ce5308d84f477845..296000bf5a25d9186826da3c3c2f401802409e07 100644 (file)
@@ -324,7 +324,7 @@ static int bdx_fw_load(struct bdx_priv *priv)
        ENTER;
        master = READ_REG(priv, regINIT_SEMAPHORE);
        if (!READ_REG(priv, regINIT_STATUS) && master) {
-               rc = request_firmware(&fw, "tehuti/firmware.bin", &priv->pdev->dev);
+               rc = request_firmware(&fw, "tehuti/bdx.bin", &priv->pdev->dev);
                if (rc)
                        goto out;
                bdx_tx_push_desc_safe(priv, (char *)fw->data, fw->size);
@@ -2510,4 +2510,4 @@ module_exit(bdx_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(BDX_DRV_DESC);
-MODULE_FIRMWARE("tehuti/firmware.bin");
+MODULE_FIRMWARE("tehuti/bdx.bin");
index 30ccbb6d097af220dced34f430723a3b3c5a9dca..6f97b7bbcbf13a29b9f2c567eb317a12ec51609c 100644 (file)
@@ -12658,7 +12658,7 @@ static void __devinit tg3_read_vpd(struct tg3 *tp)
                        cnt = pci_read_vpd(tp->pdev, pos,
                                           TG3_NVM_VPD_LEN - pos,
                                           &vpd_data[pos]);
-                       if (cnt == -ETIMEDOUT || -EINTR)
+                       if (cnt == -ETIMEDOUT || cnt == -EINTR)
                                cnt = 0;
                        else if (cnt < 0)
                                goto out_not_found;
index 5b83c3f35f47c3421a518aaa04fb39224dac2ed3..a3c46f6a15e7c5b02a91542406208928cdb23a6d 100644 (file)
@@ -1004,7 +1004,6 @@ typhoon_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
        }
 
        strcpy(info->driver, KBUILD_MODNAME);
-       strcpy(info->version, UTS_RELEASE);
        strcpy(info->bus_info, pci_name(pci_dev));
 }
 
index aea4645be7f68956472e6f6fbefeadb2e049f060..6140b56cce53d2205638803ce13702bcbfeb8910 100644 (file)
@@ -1507,6 +1507,10 @@ static const struct usb_device_id        products [] = {
        // ASIX AX88178 10/100/1000
        USB_DEVICE (0x0b95, 0x1780),
        .driver_info = (unsigned long) &ax88178_info,
+}, {
+       // Logitec LAN-GTJ/U2A
+       USB_DEVICE (0x0789, 0x0160),
+       .driver_info = (unsigned long) &ax88178_info,
 }, {
        // Linksys USB200M Rev 2
        USB_DEVICE (0x13b1, 0x0018),
index a6281e3987b5c47d6a061e1bb45538000bd6745e..2b791392e788ef893657d04056df673fc3ded672 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * MOSCHIP MCS7830 based USB 2.0 Ethernet Devices
+ * MOSCHIP MCS7830 based (7730/7830/7832) USB 2.0 Ethernet Devices
  *
  * based on usbnet.c, asix.c and the vendor provided mcs7830 driver
  *
@@ -11,6 +11,9 @@
  *
  * Definitions gathered from MOSCHIP, Data Sheet_7830DA.pdf (thanks!).
  *
+ * 2010-12-19: add 7832 USB PID ("functionality same as MCS7830"),
+ *             per active notification by manufacturer
+ *
  * TODO:
  * - support HIF_REG_CONFIG_SLEEPMODE/HIF_REG_CONFIG_TXENABLE (via autopm?)
  * - implement ethtool_ops get_pauseparam/set_pauseparam
@@ -60,6 +63,7 @@
 #define MCS7830_MAX_MCAST      64
 
 #define MCS7830_VENDOR_ID      0x9710
+#define MCS7832_PRODUCT_ID     0x7832
 #define MCS7830_PRODUCT_ID     0x7830
 #define MCS7730_PRODUCT_ID     0x7730
 
@@ -351,7 +355,7 @@ static int mcs7830_set_autoneg(struct usbnet *dev, int ptrUserPhyMode)
        if (!ret)
                ret = mcs7830_write_phy(dev, MII_BMCR,
                                BMCR_ANENABLE | BMCR_ANRESTART  );
-       return ret < 0 ? : 0;
+       return ret;
 }
 
 
@@ -626,7 +630,7 @@ static int mcs7830_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 }
 
 static const struct driver_info moschip_info = {
-       .description    = "MOSCHIP 7830/7730 usb-NET adapter",
+       .description    = "MOSCHIP 7830/7832/7730 usb-NET adapter",
        .bind           = mcs7830_bind,
        .rx_fixup       = mcs7830_rx_fixup,
        .flags          = FLAG_ETHER,
@@ -644,6 +648,10 @@ static const struct driver_info sitecom_info = {
 };
 
 static const struct usb_device_id products[] = {
+       {
+               USB_DEVICE(MCS7830_VENDOR_ID, MCS7832_PRODUCT_ID),
+               .driver_info = (unsigned long) &moschip_info,
+       },
        {
                USB_DEVICE(MCS7830_VENDOR_ID, MCS7830_PRODUCT_ID),
                .driver_info = (unsigned long) &moschip_info,
index 0bbc0c3231358de20f395d9fd34b05279e0e13fd..cc83fa71c3ffb970b5107aa812e1e1956c9c7c82 100644 (file)
@@ -166,7 +166,9 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
        if (!(rcv->flags & IFF_UP))
                goto tx_drop;
 
-       if (dev->features & NETIF_F_NO_CSUM)
+       /* don't change ip_summed == CHECKSUM_PARTIAL, as that
+          will cause bad checksum on forwarded packets */
+       if (skb->ip_summed == CHECKSUM_NONE)
                skb->ip_summed = rcv_priv->ip_summed;
 
        length = skb->len + ETH_HLEN;
index 25a2722c8a986ce35a8a37a9076814095cc70ee3..1d9aed6457234fe0cc18c34fe198dc6ca4e4d62b 100644 (file)
@@ -891,7 +891,6 @@ void hostap_setup_dev(struct net_device *dev, local_info_t *local,
 
        SET_ETHTOOL_OPS(dev, &prism2_ethtool_ops);
 
-       netif_stop_queue(dev);
 }
 
 static int hostap_enable_hostapd(local_info_t *local, int rtnl_locked)
index db540910b1104d42b5d20d15e542c3fc5da7649c..0e027f787fbce6452b0c78fefc24dbd30cb75100 100644 (file)
@@ -315,6 +315,7 @@ struct iwl_cfg iwl100_bgn_cfg = {
        .mod_params = &iwlagn_mod_params,
        .base_params = &iwl1000_base_params,
        .ht_params = &iwl1000_ht_params,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl100_bg_cfg = {
@@ -330,6 +331,7 @@ struct iwl_cfg iwl100_bg_cfg = {
        .ops = &iwl1000_ops,
        .mod_params = &iwlagn_mod_params,
        .base_params = &iwl1000_base_params,
+       .use_new_eeprom_reading = true,
 };
 
 MODULE_FIRMWARE(IWL1000_MODULE_FIRMWARE(IWL1000_UCODE_API_MAX));
index 11e6532fc573d1ed93d5ec4aaf886342d0fe8073..0ceeaac85eda1ab8a8dd8a046044b47dc5e5df63 100644 (file)
@@ -561,6 +561,7 @@ struct iwl_cfg iwl6000g2a_2agn_cfg = {
        .ht_params = &iwl6000_ht_params,
        .need_dc_calib = true,
        .need_temp_offset_calib = true,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2a_2abg_cfg = {
@@ -578,6 +579,7 @@ struct iwl_cfg iwl6000g2a_2abg_cfg = {
        .base_params = &iwl6000_base_params,
        .need_dc_calib = true,
        .need_temp_offset_calib = true,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2a_2bg_cfg = {
@@ -595,6 +597,7 @@ struct iwl_cfg iwl6000g2a_2bg_cfg = {
        .base_params = &iwl6000_base_params,
        .need_dc_calib = true,
        .need_temp_offset_calib = true,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_2agn_cfg = {
@@ -616,6 +619,7 @@ struct iwl_cfg iwl6000g2b_2agn_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_2abg_cfg = {
@@ -636,6 +640,7 @@ struct iwl_cfg iwl6000g2b_2abg_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_2bgn_cfg = {
@@ -657,6 +662,7 @@ struct iwl_cfg iwl6000g2b_2bgn_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_2bg_cfg = {
@@ -677,6 +683,7 @@ struct iwl_cfg iwl6000g2b_2bg_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_bgn_cfg = {
@@ -698,6 +705,7 @@ struct iwl_cfg iwl6000g2b_bgn_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_bg_cfg = {
@@ -718,6 +726,7 @@ struct iwl_cfg iwl6000g2b_bg_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 /*
@@ -804,6 +813,7 @@ struct iwl_cfg iwl6050g2_bgn_cfg = {
        .base_params = &iwl6050_base_params,
        .ht_params = &iwl6000_ht_params,
        .need_dc_calib = true,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6050_2abg_cfg = {
@@ -857,6 +867,7 @@ struct iwl_cfg iwl130_bgn_cfg = {
        .need_dc_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl130_bg_cfg = {
@@ -876,6 +887,7 @@ struct iwl_cfg iwl130_bg_cfg = {
        .need_dc_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 MODULE_FIRMWARE(IWL6000_MODULE_FIRMWARE(IWL6000_UCODE_API_MAX));
index a650baba0809d1bf1d56d86f687f7dc07cf53d1b..9eeeda18748de6fb12a09cc5f5b74c90553a4bf4 100644 (file)
@@ -392,7 +392,7 @@ static s8 iwl_update_channel_txpower(struct iwl_priv *priv,
 /**
  * iwlcore_eeprom_enhanced_txpower: process enhanced tx power info
  */
-void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv)
+static void iwlcore_eeprom_enhanced_txpower_old(struct iwl_priv *priv)
 {
        int eeprom_section_count = 0;
        int section, element;
@@ -419,7 +419,8 @@ void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv)
                 * always check for valid entry before process
                 * the information
                 */
-               if (!enhanced_txpower->common || enhanced_txpower->reserved)
+               if (!(enhanced_txpower->flags || enhanced_txpower->channel) ||
+                   enhanced_txpower->delta_20_in_40)
                        continue;
 
                for (element = 0; element < eeprom_section_count; element++) {
@@ -452,3 +453,86 @@ void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv)
                }
        }
 }
+
+static void
+iwlcore_eeprom_enh_txp_read_element(struct iwl_priv *priv,
+                                   struct iwl_eeprom_enhanced_txpwr *txp,
+                                   s8 max_txpower_avg)
+{
+       int ch_idx;
+       bool is_ht40 = txp->flags & IWL_EEPROM_ENH_TXP_FL_40MHZ;
+       enum ieee80211_band band;
+
+       band = txp->flags & IWL_EEPROM_ENH_TXP_FL_BAND_52G ?
+               IEEE80211_BAND_5GHZ : IEEE80211_BAND_2GHZ;
+
+       for (ch_idx = 0; ch_idx < priv->channel_count; ch_idx++) {
+               struct iwl_channel_info *ch_info = &priv->channel_info[ch_idx];
+
+               /* update matching channel or from common data only */
+               if (txp->channel != 0 && ch_info->channel != txp->channel)
+                       continue;
+
+               /* update matching band only */
+               if (band != ch_info->band)
+                       continue;
+
+               if (ch_info->max_power_avg < max_txpower_avg && !is_ht40) {
+                       ch_info->max_power_avg = max_txpower_avg;
+                       ch_info->curr_txpow = max_txpower_avg;
+                       ch_info->scan_power = max_txpower_avg;
+               }
+
+               if (is_ht40 && ch_info->ht40_max_power_avg < max_txpower_avg)
+                       ch_info->ht40_max_power_avg = max_txpower_avg;
+       }
+}
+
+#define EEPROM_TXP_OFFS        (0x00 | INDIRECT_ADDRESS | INDIRECT_TXP_LIMIT)
+#define EEPROM_TXP_ENTRY_LEN sizeof(struct iwl_eeprom_enhanced_txpwr)
+#define EEPROM_TXP_SZ_OFFS (0x00 | INDIRECT_ADDRESS | INDIRECT_TXP_LIMIT_SIZE)
+
+static void iwlcore_eeprom_enhanced_txpower_new(struct iwl_priv *priv)
+{
+       struct iwl_eeprom_enhanced_txpwr *txp_array, *txp;
+       int idx, entries;
+       __le16 *txp_len;
+       s8 max_txp_avg, max_txp_avg_halfdbm;
+
+       BUILD_BUG_ON(sizeof(struct iwl_eeprom_enhanced_txpwr) != 8);
+
+       /* the length is in 16-bit words, but we want entries */
+       txp_len = (__le16 *) iwlagn_eeprom_query_addr(priv, EEPROM_TXP_SZ_OFFS);
+       entries = le16_to_cpup(txp_len) * 2 / EEPROM_TXP_ENTRY_LEN;
+
+       txp_array = (void *) iwlagn_eeprom_query_addr(priv, EEPROM_TXP_OFFS);
+       for (idx = 0; idx < entries; idx++) {
+               txp = &txp_array[idx];
+
+               /* skip invalid entries */
+               if (!(txp->flags & IWL_EEPROM_ENH_TXP_FL_VALID))
+                       continue;
+
+               max_txp_avg = iwl_get_max_txpower_avg(priv, txp_array, idx,
+                                                     &max_txp_avg_halfdbm);
+
+               /*
+                * Update the user limit values values to the highest
+                * power supported by any channel
+                */
+               if (max_txp_avg > priv->tx_power_user_lmt)
+                       priv->tx_power_user_lmt = max_txp_avg;
+               if (max_txp_avg_halfdbm > priv->tx_power_lmt_in_half_dbm)
+                       priv->tx_power_lmt_in_half_dbm = max_txp_avg_halfdbm;
+
+               iwlcore_eeprom_enh_txp_read_element(priv, txp, max_txp_avg);
+       }
+}
+
+void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv)
+{
+       if (priv->cfg->use_new_eeprom_reading)
+               iwlcore_eeprom_enhanced_txpower_new(priv);
+       else
+               iwlcore_eeprom_enhanced_txpower_old(priv);
+}
index b555edd533547e3bf98eed8e948a276ef141aace..554afb7d9670dff564873b9509cd59c3058a346e 100644 (file)
@@ -569,6 +569,12 @@ static u32 eeprom_indirect_address(const struct iwl_priv *priv, u32 address)
        case INDIRECT_REGULATORY:
                offset = iwl_eeprom_query16(priv, EEPROM_LINK_REGULATORY);
                break;
+       case INDIRECT_TXP_LIMIT:
+               offset = iwl_eeprom_query16(priv, EEPROM_LINK_TXP_LIMIT);
+               break;
+       case INDIRECT_TXP_LIMIT_SIZE:
+               offset = iwl_eeprom_query16(priv, EEPROM_LINK_TXP_LIMIT_SIZE);
+               break;
        case INDIRECT_CALIBRATION:
                offset = iwl_eeprom_query16(priv, EEPROM_LINK_CALIBRATION);
                break;
index 64527def059f7de76ac0e4bdcadd0147576805c1..954ecc2c34c41f239dc1d24932d67be68aa741fa 100644 (file)
@@ -390,6 +390,7 @@ struct iwl_cfg {
        const bool need_temp_offset_calib; /* if used set to true */
        u8 scan_rx_antennas[IEEE80211_NUM_BANDS];
        u8 scan_tx_antennas[IEEE80211_NUM_BANDS];
+       const bool use_new_eeprom_reading; /* temporary, remove later */
 };
 
 /***************************
index d9b590625ae4eb16ba996d0bbd8d794acc30dae9..e3a279d2d0b6307fcf5d38948454cb7ba8da29c7 100644 (file)
@@ -120,6 +120,17 @@ struct iwl_eeprom_channel {
        s8 max_power_avg;       /* max power (dBm) on this chnl, limit 31 */
 } __packed;
 
+enum iwl_eeprom_enhanced_txpwr_flags {
+       IWL_EEPROM_ENH_TXP_FL_VALID             = BIT(0),
+       IWL_EEPROM_ENH_TXP_FL_BAND_52G          = BIT(1),
+       IWL_EEPROM_ENH_TXP_FL_OFDM              = BIT(2),
+       IWL_EEPROM_ENH_TXP_FL_40MHZ             = BIT(3),
+       IWL_EEPROM_ENH_TXP_FL_HT_AP             = BIT(4),
+       IWL_EEPROM_ENH_TXP_FL_RES1              = BIT(5),
+       IWL_EEPROM_ENH_TXP_FL_RES2              = BIT(6),
+       IWL_EEPROM_ENH_TXP_FL_COMMON_TYPE       = BIT(7),
+};
+
 /**
  * iwl_eeprom_enhanced_txpwr structure
  *    This structure presents the enhanced regulatory tx power limit layout
@@ -127,21 +138,23 @@ struct iwl_eeprom_channel {
  *    Enhanced regulatory tx power portion of eeprom image can be broken down
  *    into individual structures; each one is 8 bytes in size and contain the
  *    following information
- * @common: (desc + channel) not used by driver, should _NOT_ be "zero"
+ * @flags: entry flags
+ * @channel: channel number
  * @chain_a_max_pwr: chain a max power in 1/2 dBm
  * @chain_b_max_pwr: chain b max power in 1/2 dBm
  * @chain_c_max_pwr: chain c max power in 1/2 dBm
- * @reserved: not used, should be "zero"
+ * @delta_20_in_40: 20-in-40 deltas (hi/lo)
  * @mimo2_max_pwr: mimo2 max power in 1/2 dBm
  * @mimo3_max_pwr: mimo3 max power in 1/2 dBm
  *
  */
 struct iwl_eeprom_enhanced_txpwr {
-       __le16 common;
+       u8 flags;
+       u8 channel;
        s8 chain_a_max;
        s8 chain_b_max;
        s8 chain_c_max;
-       s8 reserved;
+       u8 delta_20_in_40;
        s8 mimo2_max;
        s8 mimo3_max;
 } __packed;
@@ -186,6 +199,8 @@ struct iwl_eeprom_enhanced_txpwr {
 #define EEPROM_LINK_CALIBRATION      (2*0x67)
 #define EEPROM_LINK_PROCESS_ADJST    (2*0x68)
 #define EEPROM_LINK_OTHERS           (2*0x69)
+#define EEPROM_LINK_TXP_LIMIT        (2*0x6a)
+#define EEPROM_LINK_TXP_LIMIT_SIZE   (2*0x6b)
 
 /* agn regulatory - indirect access */
 #define EEPROM_REG_BAND_1_CHANNELS       ((0x08)\
@@ -389,6 +404,8 @@ struct iwl_eeprom_calib_info {
 #define INDIRECT_CALIBRATION        0x00040000
 #define INDIRECT_PROCESS_ADJST      0x00050000
 #define INDIRECT_OTHERS             0x00060000
+#define INDIRECT_TXP_LIMIT          0x00070000
+#define INDIRECT_TXP_LIMIT_SIZE     0x00080000
 #define INDIRECT_ADDRESS            0x00100000
 
 /* General */
index 373930afc26b50f2d7cddeb9027b07f8e5ad7775..113f4f204657ec911b6fe01c112f83f57d0d7fa8 100644 (file)
@@ -619,7 +619,7 @@ static int lbs_ret_scan(struct lbs_private *priv, unsigned long dummy,
                                     print_ssid(ssid_buf, ssid, ssid_len),
                                     LBS_SCAN_RSSI_TO_MBM(rssi)/100);
 
-                       if (channel ||
+                       if (channel &&
                            !(channel->flags & IEEE80211_CHAN_DISABLED))
                                cfg80211_inform_bss(wiphy, channel,
                                        bssid, le64_to_cpu(*(__le64 *)tsfdesc),
index d5bc21e5a02c7520d6599ad5822eafe85b267823..2325e56a9b0bd8cc35c7ee958be19ed5ff2adf2f 100644 (file)
@@ -43,6 +43,7 @@ MODULE_FIRMWARE("isl3887usb");
 
 static struct usb_device_id p54u_table[] __devinitdata = {
        /* Version 1 devices (pci chip + net2280) */
+       {USB_DEVICE(0x0411, 0x0050)},   /* Buffalo WLI2-USB2-G54 */
        {USB_DEVICE(0x045e, 0x00c2)},   /* Microsoft MN-710 */
        {USB_DEVICE(0x0506, 0x0a11)},   /* 3COM 3CRWE254G72 */
        {USB_DEVICE(0x06b9, 0x0120)},   /* Thomson SpeedTouch 120g */
@@ -56,9 +57,13 @@ static struct usb_device_id p54u_table[] __devinitdata = {
        {USB_DEVICE(0x0846, 0x4220)},   /* Netgear WG111 */
        {USB_DEVICE(0x09aa, 0x1000)},   /* Spinnaker Proto board */
        {USB_DEVICE(0x0cde, 0x0006)},   /* Medion 40900, Roper Europe */
+       {USB_DEVICE(0x0db0, 0x6826)},   /* MSI UB54G (MS-6826) */
        {USB_DEVICE(0x107b, 0x55f2)},   /* Gateway WGU-210 (Gemtek) */
        {USB_DEVICE(0x124a, 0x4023)},   /* Shuttle PN15, Airvast WM168g, IOGear GWU513 */
+       {USB_DEVICE(0x1435, 0x0210)},   /* Inventel UR054G */
+       {USB_DEVICE(0x15a9, 0x0002)},   /* Gemtek WUBI-100GW 802.11g */
        {USB_DEVICE(0x1630, 0x0005)},   /* 2Wire 802.11g USB (v1) / Z-Com */
+       {USB_DEVICE(0x182d, 0x096b)},   /* Sitecom WL-107 */
        {USB_DEVICE(0x1915, 0x2234)},   /* Linksys WUSB54G OEM */
        {USB_DEVICE(0x1915, 0x2235)},   /* Linksys WUSB54G Portable OEM */
        {USB_DEVICE(0x2001, 0x3701)},   /* DLink DWL-G120 Spinnaker */
@@ -94,6 +99,7 @@ static struct usb_device_id p54u_table[] __devinitdata = {
        {USB_DEVICE(0x1435, 0x0427)},   /* Inventel UR054G */
        {USB_DEVICE(0x1668, 0x1050)},   /* Actiontec 802UIG-1 */
        {USB_DEVICE(0x2001, 0x3704)},   /* DLink DWL-G122 rev A2 */
+       {USB_DEVICE(0x2001, 0x3705)},   /* D-Link DWL-G120 rev C1 */
        {USB_DEVICE(0x413c, 0x5513)},   /* Dell WLA3310 USB Wireless Adapter */
        {USB_DEVICE(0x413c, 0x8102)},   /* Spinnaker DUT */
        {USB_DEVICE(0x413c, 0x8104)},   /* Cohiba Proto board */
index b267395359863ca10a55fe8a39f186e5bf08deb9..09a67905c230e5f9a9029ba2c786a707d73b5108 100644 (file)
@@ -912,6 +912,7 @@ static int rt2800pci_probe_hw(struct rt2x00_dev *rt2x00dev)
        __set_bit(DRIVER_REQUIRE_DMA, &rt2x00dev->flags);
        __set_bit(DRIVER_REQUIRE_L2PAD, &rt2x00dev->flags);
        __set_bit(DRIVER_REQUIRE_TXSTATUS_FIFO, &rt2x00dev->flags);
+       __set_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags);
        if (!modparam_nohwcrypt)
                __set_bit(CONFIG_SUPPORT_HW_CRYPTO, &rt2x00dev->flags);
        __set_bit(DRIVER_SUPPORT_LINK_TUNING, &rt2x00dev->flags);
index 94fe589acfaabff06eac097fe7e3ae3e2f7ea310..ab43e7ca2a231000ba0d40489860f950883be16d 100644 (file)
@@ -664,6 +664,7 @@ enum rt2x00_flags {
        DRIVER_REQUIRE_COPY_IV,
        DRIVER_REQUIRE_L2PAD,
        DRIVER_REQUIRE_TXSTATUS_FIFO,
+       DRIVER_REQUIRE_TASKLET_CONTEXT,
 
        /*
         * Driver features
index 5ba79b935f09f5bed56f999653338501584cf141..d019830ca8407c2aa10ed0ae95abe8679c654e55 100644 (file)
@@ -390,9 +390,12 @@ void rt2x00lib_txdone(struct queue_entry *entry,
         * through a mac80211 library call (RTS/CTS) then we should not
         * send the status report back.
         */
-       if (!(skbdesc_flags & SKBDESC_NOT_MAC80211))
-               ieee80211_tx_status(rt2x00dev->hw, entry->skb);
-       else
+       if (!(skbdesc_flags & SKBDESC_NOT_MAC80211)) {
+               if (test_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags))
+                       ieee80211_tx_status(rt2x00dev->hw, entry->skb);
+               else
+                       ieee80211_tx_status_ni(rt2x00dev->hw, entry->skb);
+       } else
                dev_kfree_skb_any(entry->skb);
 
        /*
index cd1b3dcd61db67671e28df47cae7a19293fa881f..ec47e22fa186ce89ed7c2758998e6ff008d140aa 100644 (file)
@@ -744,7 +744,7 @@ static int yellowfin_init_ring(struct net_device *dev)
        }
 
        for (i = 0; i < RX_RING_SIZE; i++) {
-               struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz);
+               struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz + 2);
                yp->rx_skbuff[i] = skb;
                if (skb == NULL)
                        break;
@@ -1157,7 +1157,7 @@ static int yellowfin_rx(struct net_device *dev)
        for (; yp->cur_rx - yp->dirty_rx > 0; yp->dirty_rx++) {
                entry = yp->dirty_rx % RX_RING_SIZE;
                if (yp->rx_skbuff[entry] == NULL) {
-                       struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz);
+                       struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz + 2);
                        if (skb == NULL)
                                break;                          /* Better luck next round. */
                        yp->rx_skbuff[entry] = skb;
index c85d3c7421fc94796c6814d0c453b33f831a9279..f37fbeb66a4400c2c7fa28bc45cd8be5af7c7c47 100644 (file)
@@ -61,7 +61,7 @@ void of_i2c_register_devices(struct i2c_adapter *adap)
                info.of_node = of_node_get(node);
                info.archdata = &dev_ad;
 
-               request_module("%s", info.type);
+               request_module("%s%s", I2C_MODULE_PREFIX, info.type);
 
                result = i2c_new_device(adap, &info);
                if (result == NULL) {
index 2574700db461559717a95e0623c1c58c5821c687..5f7226223a62c9067bcc4a47f1b3e6bd47b69c40 100644 (file)
@@ -115,7 +115,8 @@ static struct pcie_port_service_driver __initdata dummy_driver = {
 static int __init select_detection_mode(void)
 {
        struct dummy_slot *slot, *tmp;
-       pcie_port_service_register(&dummy_driver);
+       if (pcie_port_service_register(&dummy_driver))
+               return PCIEHP_DETECT_ACPI;
        pcie_port_service_unregister(&dummy_driver);
        list_for_each_entry_safe(slot, tmp, &dummy_slots, list) {
                list_del(&slot->list);
index c44a5e8b8b82da9d06706d9cd3a3ec0fcb2b883c..f0b3ad13c273fb73394cc78ca8fb36521a8a4c90 100644 (file)
@@ -75,6 +75,7 @@
 #include <drm/i915_drm.h>
 #include <asm/msr.h>
 #include <asm/processor.h>
+#include "intel_ips.h"
 
 #define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
 
 #define thm_writel(off, val) writel((val), ips->regmap + (off))
 
 static const int IPS_ADJUST_PERIOD = 5000; /* ms */
+static bool late_i915_load = false;
 
 /* For initial average collection */
 static const int IPS_SAMPLE_PERIOD = 200; /* ms */
@@ -339,6 +341,9 @@ struct ips_driver {
        u64 orig_turbo_ratios;
 };
 
+static bool
+ips_gpu_turbo_enabled(struct ips_driver *ips);
+
 /**
  * ips_cpu_busy - is CPU busy?
  * @ips: IPS driver struct
@@ -517,7 +522,7 @@ static void ips_disable_cpu_turbo(struct ips_driver *ips)
  */
 static bool ips_gpu_busy(struct ips_driver *ips)
 {
-       if (!ips->gpu_turbo_enabled)
+       if (!ips_gpu_turbo_enabled(ips))
                return false;
 
        return ips->gpu_busy();
@@ -532,7 +537,7 @@ static bool ips_gpu_busy(struct ips_driver *ips)
  */
 static void ips_gpu_raise(struct ips_driver *ips)
 {
-       if (!ips->gpu_turbo_enabled)
+       if (!ips_gpu_turbo_enabled(ips))
                return;
 
        if (!ips->gpu_raise())
@@ -549,7 +554,7 @@ static void ips_gpu_raise(struct ips_driver *ips)
  */
 static void ips_gpu_lower(struct ips_driver *ips)
 {
-       if (!ips->gpu_turbo_enabled)
+       if (!ips_gpu_turbo_enabled(ips))
                return;
 
        if (!ips->gpu_lower())
@@ -1454,6 +1459,31 @@ out_err:
        return false;
 }
 
+static bool
+ips_gpu_turbo_enabled(struct ips_driver *ips)
+{
+       if (!ips->gpu_busy && late_i915_load) {
+               if (ips_get_i915_syms(ips)) {
+                       dev_info(&ips->dev->dev,
+                                "i915 driver attached, reenabling gpu turbo\n");
+                       ips->gpu_turbo_enabled = !(thm_readl(THM_HTS) & HTS_GTD_DIS);
+               }
+       }
+
+       return ips->gpu_turbo_enabled;
+}
+
+void
+ips_link_to_i915_driver()
+{
+       /* We can't cleanly get at the various ips_driver structs from
+        * this caller (the i915 driver), so just set a flag saying
+        * that it's time to try getting the symbols again.
+        */
+       late_i915_load = true;
+}
+EXPORT_SYMBOL_GPL(ips_link_to_i915_driver);
+
 static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
                     PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), },
diff --git a/drivers/platform/x86/intel_ips.h b/drivers/platform/x86/intel_ips.h
new file mode 100644 (file)
index 0000000..73299be
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+void ips_link_to_i915_driver(void);
index 41a9e34899ac5f81da6dde61f6aa2a1d34e2e134..ca35b0ce944a58ca017ff5b5678b73a468d7a42a 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/sfi.h>
 #include <asm/mrst.h>
 #include <asm/intel_scu_ipc.h>
+#include <asm/mrst.h>
 
 /* IPC defines the following message types */
 #define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */
@@ -699,6 +700,9 @@ static int ipc_probe(struct pci_dev *dev, const struct pci_device_id *id)
                iounmap(ipcdev.ipc_base);
                return -ENOMEM;
        }
+
+       intel_scu_devices_create();
+
        return 0;
 }
 
@@ -720,6 +724,7 @@ static void ipc_remove(struct pci_dev *pdev)
        iounmap(ipcdev.ipc_base);
        iounmap(ipcdev.i2c_base);
        ipcdev.pdev = NULL;
+       intel_scu_devices_destroy();
 }
 
 static const struct pci_device_id pci_ids[] = {
index 2883428d5ac806408b5082221036ffa30915356c..4941cade319f5cef06d508d0b1f1354d951c1034 100644 (file)
@@ -463,6 +463,18 @@ config RTC_DRV_CMOS
          This driver can also be built as a module. If so, the module
          will be called rtc-cmos.
 
+config RTC_DRV_VRTC
+       tristate "Virtual RTC for Moorestown platforms"
+       depends on X86_MRST
+       default y if X86_MRST
+
+       help
+       Say "yes" here to get direct support for the real time clock
+       found on Moorestown platforms. The VRTC is a emulated RTC that
+       derives its clock source from a real RTC in the PMIC. The MC146818
+       style programming interface is mostly conserved, but any
+       updates are done via IPC calls to the system controller FW.
+
 config RTC_DRV_DS1216
        tristate "Dallas DS1216"
        depends on SNI_RM
index 4c2832df4697d3cdba4af02990c683dc13118776..2afdaf3ff98660f53c72a189503786ebeac0a27c 100644 (file)
@@ -30,6 +30,7 @@ obj-$(CONFIG_RTC_DRV_CMOS)    += rtc-cmos.o
 obj-$(CONFIG_RTC_DRV_COH901331)        += rtc-coh901331.o
 obj-$(CONFIG_RTC_DRV_DAVINCI)  += rtc-davinci.o
 obj-$(CONFIG_RTC_DRV_DM355EVM) += rtc-dm355evm.o
+obj-$(CONFIG_RTC_DRV_VRTC)     += rtc-mrst.o
 obj-$(CONFIG_RTC_DRV_DS1216)   += rtc-ds1216.o
 obj-$(CONFIG_RTC_DRV_DS1286)   += rtc-ds1286.o
 obj-$(CONFIG_RTC_DRV_DS1302)   += rtc-ds1302.o
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
new file mode 100644 (file)
index 0000000..bcd0cf6
--- /dev/null
@@ -0,0 +1,582 @@
+/*
+ * rtc-mrst.c: Driver for Moorestown virtual RTC
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Jacob Pan (jacob.jun.pan@intel.com)
+ *        Feng Tang (feng.tang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Note:
+ * VRTC is emulated by system controller firmware, the real HW
+ * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
+ * in a memory mapped IO space that is visible to the host IA
+ * processor.
+ *
+ * This driver is based upon drivers/rtc/rtc-cmos.c
+ */
+
+/*
+ * Note:
+ *  * vRTC only supports binary mode and 24H mode
+ *  * vRTC only support PIE and AIE, no UIE, and its PIE only happens
+ *    at 23:59:59pm everyday, no support for adjustable frequency
+ *  * Alarm function is also limited to hr/min/sec.
+ */
+
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+
+#include <asm-generic/rtc.h>
+#include <asm/intel_scu_ipc.h>
+#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
+
+struct mrst_rtc {
+       struct rtc_device       *rtc;
+       struct device           *dev;
+       int                     irq;
+       struct resource         *iomem;
+
+       u8                      enabled_wake;
+       u8                      suspend_ctrl;
+};
+
+static const char driver_name[] = "rtc_mrst";
+
+#define        RTC_IRQMASK     (RTC_PF | RTC_AF)
+
+static inline int is_intr(u8 rtc_intr)
+{
+       if (!(rtc_intr & RTC_IRQF))
+               return 0;
+       return rtc_intr & RTC_IRQMASK;
+}
+
+/*
+ * rtc_time's year contains the increment over 1900, but vRTC's YEAR
+ * register can't be programmed to value larger than 0x64, so vRTC
+ * driver chose to use 1960 (1970 is UNIX time start point) as the base,
+ * and does the translation at read/write time.
+ *
+ * Why not just use 1970 as the offset? it's because using 1960 will
+ * make it consistent in leap year setting for both vrtc and low-level
+ * physical rtc devices.
+ */
+static int mrst_read_time(struct device *dev, struct rtc_time *time)
+{
+       unsigned long flags;
+
+       if (rtc_is_updating())
+               mdelay(20);
+
+       spin_lock_irqsave(&rtc_lock, flags);
+       time->tm_sec = vrtc_cmos_read(RTC_SECONDS);
+       time->tm_min = vrtc_cmos_read(RTC_MINUTES);
+       time->tm_hour = vrtc_cmos_read(RTC_HOURS);
+       time->tm_mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
+       time->tm_mon = vrtc_cmos_read(RTC_MONTH);
+       time->tm_year = vrtc_cmos_read(RTC_YEAR);
+       spin_unlock_irqrestore(&rtc_lock, flags);
+
+       /* Adjust for the 1960/1900 */
+       time->tm_year += 60;
+       time->tm_mon--;
+       return RTC_24H;
+}
+
+static int mrst_set_time(struct device *dev, struct rtc_time *time)
+{
+       int ret;
+       unsigned long flags;
+       unsigned char mon, day, hrs, min, sec;
+       unsigned int yrs;
+
+       yrs = time->tm_year;
+       mon = time->tm_mon + 1;   /* tm_mon starts at zero */
+       day = time->tm_mday;
+       hrs = time->tm_hour;
+       min = time->tm_min;
+       sec = time->tm_sec;
+
+       if (yrs < 70 || yrs > 138)
+               return -EINVAL;
+       yrs -= 60;
+
+       spin_lock_irqsave(&rtc_lock, flags);
+
+       vrtc_cmos_write(yrs, RTC_YEAR);
+       vrtc_cmos_write(mon, RTC_MONTH);
+       vrtc_cmos_write(day, RTC_DAY_OF_MONTH);
+       vrtc_cmos_write(hrs, RTC_HOURS);
+       vrtc_cmos_write(min, RTC_MINUTES);
+       vrtc_cmos_write(sec, RTC_SECONDS);
+
+       spin_unlock_irqrestore(&rtc_lock, flags);
+
+       ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETTIME);
+       return ret;
+}
+
+static int mrst_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned char rtc_control;
+
+       if (mrst->irq <= 0)
+               return -EIO;
+
+       /* Basic alarms only support hour, minute, and seconds fields.
+        * Some also support day and month, for alarms up to a year in
+        * the future.
+        */
+       t->time.tm_mday = -1;
+       t->time.tm_mon = -1;
+       t->time.tm_year = -1;
+
+       /* vRTC only supports binary mode */
+       spin_lock_irq(&rtc_lock);
+       t->time.tm_sec = vrtc_cmos_read(RTC_SECONDS_ALARM);
+       t->time.tm_min = vrtc_cmos_read(RTC_MINUTES_ALARM);
+       t->time.tm_hour = vrtc_cmos_read(RTC_HOURS_ALARM);
+
+       rtc_control = vrtc_cmos_read(RTC_CONTROL);
+       spin_unlock_irq(&rtc_lock);
+
+       t->enabled = !!(rtc_control & RTC_AIE);
+       t->pending = 0;
+
+       return 0;
+}
+
+static void mrst_checkintr(struct mrst_rtc *mrst, unsigned char rtc_control)
+{
+       unsigned char   rtc_intr;
+
+       /*
+        * NOTE after changing RTC_xIE bits we always read INTR_FLAGS;
+        * allegedly some older rtcs need that to handle irqs properly
+        */
+       rtc_intr = vrtc_cmos_read(RTC_INTR_FLAGS);
+       rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
+       if (is_intr(rtc_intr))
+               rtc_update_irq(mrst->rtc, 1, rtc_intr);
+}
+
+static void mrst_irq_enable(struct mrst_rtc *mrst, unsigned char mask)
+{
+       unsigned char   rtc_control;
+
+       /*
+        * Flush any pending IRQ status, notably for update irqs,
+        * before we enable new IRQs
+        */
+       rtc_control = vrtc_cmos_read(RTC_CONTROL);
+       mrst_checkintr(mrst, rtc_control);
+
+       rtc_control |= mask;
+       vrtc_cmos_write(rtc_control, RTC_CONTROL);
+
+       mrst_checkintr(mrst, rtc_control);
+}
+
+static void mrst_irq_disable(struct mrst_rtc *mrst, unsigned char mask)
+{
+       unsigned char   rtc_control;
+
+       rtc_control = vrtc_cmos_read(RTC_CONTROL);
+       rtc_control &= ~mask;
+       vrtc_cmos_write(rtc_control, RTC_CONTROL);
+       mrst_checkintr(mrst, rtc_control);
+}
+
+static int mrst_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned char hrs, min, sec;
+       int ret = 0;
+
+       if (!mrst->irq)
+               return -EIO;
+
+       hrs = t->time.tm_hour;
+       min = t->time.tm_min;
+       sec = t->time.tm_sec;
+
+       spin_lock_irq(&rtc_lock);
+       /* Next rtc irq must not be from previous alarm setting */
+       mrst_irq_disable(mrst, RTC_AIE);
+
+       /* Update alarm */
+       vrtc_cmos_write(hrs, RTC_HOURS_ALARM);
+       vrtc_cmos_write(min, RTC_MINUTES_ALARM);
+       vrtc_cmos_write(sec, RTC_SECONDS_ALARM);
+
+       spin_unlock_irq(&rtc_lock);
+
+       ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETALARM);
+       if (ret)
+               return ret;
+
+       spin_lock_irq(&rtc_lock);
+       if (t->enabled)
+               mrst_irq_enable(mrst, RTC_AIE);
+
+       spin_unlock_irq(&rtc_lock);
+
+       return 0;
+}
+
+static int mrst_irq_set_state(struct device *dev, int enabled)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned long   flags;
+
+       if (!mrst->irq)
+               return -ENXIO;
+
+       spin_lock_irqsave(&rtc_lock, flags);
+
+       if (enabled)
+               mrst_irq_enable(mrst, RTC_PIE);
+       else
+               mrst_irq_disable(mrst, RTC_PIE);
+
+       spin_unlock_irqrestore(&rtc_lock, flags);
+       return 0;
+}
+
+#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE)
+
+/* Currently, the vRTC doesn't support UIE ON/OFF */
+static int
+mrst_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned long   flags;
+
+       switch (cmd) {
+       case RTC_AIE_OFF:
+       case RTC_AIE_ON:
+               if (!mrst->irq)
+                       return -EINVAL;
+               break;
+       default:
+               /* PIE ON/OFF is handled by mrst_irq_set_state() */
+               return -ENOIOCTLCMD;
+       }
+
+       spin_lock_irqsave(&rtc_lock, flags);
+       switch (cmd) {
+       case RTC_AIE_OFF:       /* alarm off */
+               mrst_irq_disable(mrst, RTC_AIE);
+               break;
+       case RTC_AIE_ON:        /* alarm on */
+               mrst_irq_enable(mrst, RTC_AIE);
+               break;
+       }
+       spin_unlock_irqrestore(&rtc_lock, flags);
+       return 0;
+}
+
+#else
+#define        mrst_rtc_ioctl  NULL
+#endif
+
+#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
+
+static int mrst_procfs(struct device *dev, struct seq_file *seq)
+{
+       unsigned char   rtc_control, valid;
+
+       spin_lock_irq(&rtc_lock);
+       rtc_control = vrtc_cmos_read(RTC_CONTROL);
+       valid = vrtc_cmos_read(RTC_VALID);
+       spin_unlock_irq(&rtc_lock);
+
+       return seq_printf(seq,
+                       "periodic_IRQ\t: %s\n"
+                       "alarm\t\t: %s\n"
+                       "BCD\t\t: no\n"
+                       "periodic_freq\t: daily (not adjustable)\n",
+                       (rtc_control & RTC_PIE) ? "on" : "off",
+                       (rtc_control & RTC_AIE) ? "on" : "off");
+}
+
+#else
+#define        mrst_procfs     NULL
+#endif
+
+static const struct rtc_class_ops mrst_rtc_ops = {
+       .ioctl          = mrst_rtc_ioctl,
+       .read_time      = mrst_read_time,
+       .set_time       = mrst_set_time,
+       .read_alarm     = mrst_read_alarm,
+       .set_alarm      = mrst_set_alarm,
+       .proc           = mrst_procfs,
+       .irq_set_state  = mrst_irq_set_state,
+};
+
+static struct mrst_rtc mrst_rtc;
+
+/*
+ * When vRTC IRQ is captured by SCU FW, FW will clear the AIE bit in
+ * Reg B, so no need for this driver to clear it
+ */
+static irqreturn_t mrst_rtc_irq(int irq, void *p)
+{
+       u8 irqstat;
+
+       spin_lock(&rtc_lock);
+       /* This read will clear all IRQ flags inside Reg C */
+       irqstat = vrtc_cmos_read(RTC_INTR_FLAGS);
+       spin_unlock(&rtc_lock);
+
+       irqstat &= RTC_IRQMASK | RTC_IRQF;
+       if (is_intr(irqstat)) {
+               rtc_update_irq(p, 1, irqstat);
+               return IRQ_HANDLED;
+       }
+       return IRQ_NONE;
+}
+
+static int __init
+vrtc_mrst_do_probe(struct device *dev, struct resource *iomem, int rtc_irq)
+{
+       int retval = 0;
+       unsigned char rtc_control;
+
+       /* There can be only one ... */
+       if (mrst_rtc.dev)
+               return -EBUSY;
+
+       if (!iomem)
+               return -ENODEV;
+
+       iomem = request_mem_region(iomem->start,
+                       iomem->end + 1 - iomem->start,
+                       driver_name);
+       if (!iomem) {
+               dev_dbg(dev, "i/o mem already in use.\n");
+               return -EBUSY;
+       }
+
+       mrst_rtc.irq = rtc_irq;
+       mrst_rtc.iomem = iomem;
+
+       mrst_rtc.rtc = rtc_device_register(driver_name, dev,
+                               &mrst_rtc_ops, THIS_MODULE);
+       if (IS_ERR(mrst_rtc.rtc)) {
+               retval = PTR_ERR(mrst_rtc.rtc);
+               goto cleanup0;
+       }
+
+       mrst_rtc.dev = dev;
+       dev_set_drvdata(dev, &mrst_rtc);
+       rename_region(iomem, dev_name(&mrst_rtc.rtc->dev));
+
+       spin_lock_irq(&rtc_lock);
+       mrst_irq_disable(&mrst_rtc, RTC_PIE | RTC_AIE);
+       rtc_control = vrtc_cmos_read(RTC_CONTROL);
+       spin_unlock_irq(&rtc_lock);
+
+       if (!(rtc_control & RTC_24H) || (rtc_control & (RTC_DM_BINARY)))
+               dev_dbg(dev, "TODO: support more than 24-hr BCD mode\n");
+
+       if (rtc_irq) {
+               retval = request_irq(rtc_irq, mrst_rtc_irq,
+                               IRQF_DISABLED, dev_name(&mrst_rtc.rtc->dev),
+                               mrst_rtc.rtc);
+               if (retval < 0) {
+                       dev_dbg(dev, "IRQ %d is already in use, err %d\n",
+                               rtc_irq, retval);
+                       goto cleanup1;
+               }
+       }
+       dev_dbg(dev, "initialised\n");
+       return 0;
+
+cleanup1:
+       mrst_rtc.dev = NULL;
+       rtc_device_unregister(mrst_rtc.rtc);
+cleanup0:
+       release_region(iomem->start, iomem->end + 1 - iomem->start);
+       dev_err(dev, "rtc-mrst: unable to initialise\n");
+       return retval;
+}
+
+static void rtc_mrst_do_shutdown(void)
+{
+       spin_lock_irq(&rtc_lock);
+       mrst_irq_disable(&mrst_rtc, RTC_IRQMASK);
+       spin_unlock_irq(&rtc_lock);
+}
+
+static void __exit rtc_mrst_do_remove(struct device *dev)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       struct resource *iomem;
+
+       rtc_mrst_do_shutdown();
+
+       if (mrst->irq)
+               free_irq(mrst->irq, mrst->rtc);
+
+       rtc_device_unregister(mrst->rtc);
+       mrst->rtc = NULL;
+
+       iomem = mrst->iomem;
+       release_region(iomem->start, iomem->end + 1 - iomem->start);
+       mrst->iomem = NULL;
+
+       mrst->dev = NULL;
+       dev_set_drvdata(dev, NULL);
+}
+
+#ifdef CONFIG_PM
+static int mrst_suspend(struct device *dev, pm_message_t mesg)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned char   tmp;
+
+       /* Only the alarm might be a wakeup event source */
+       spin_lock_irq(&rtc_lock);
+       mrst->suspend_ctrl = tmp = vrtc_cmos_read(RTC_CONTROL);
+       if (tmp & (RTC_PIE | RTC_AIE)) {
+               unsigned char   mask;
+
+               if (device_may_wakeup(dev))
+                       mask = RTC_IRQMASK & ~RTC_AIE;
+               else
+                       mask = RTC_IRQMASK;
+               tmp &= ~mask;
+               vrtc_cmos_write(tmp, RTC_CONTROL);
+
+               mrst_checkintr(mrst, tmp);
+       }
+       spin_unlock_irq(&rtc_lock);
+
+       if (tmp & RTC_AIE) {
+               mrst->enabled_wake = 1;
+               enable_irq_wake(mrst->irq);
+       }
+
+       dev_dbg(&mrst_rtc.rtc->dev, "suspend%s, ctrl %02x\n",
+                       (tmp & RTC_AIE) ? ", alarm may wake" : "",
+                       tmp);
+
+       return 0;
+}
+
+/*
+ * We want RTC alarms to wake us from the deep power saving state
+ */
+static inline int mrst_poweroff(struct device *dev)
+{
+       return mrst_suspend(dev, PMSG_HIBERNATE);
+}
+
+static int mrst_resume(struct device *dev)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned char tmp = mrst->suspend_ctrl;
+
+       /* Re-enable any irqs previously active */
+       if (tmp & RTC_IRQMASK) {
+               unsigned char   mask;
+
+               if (mrst->enabled_wake) {
+                       disable_irq_wake(mrst->irq);
+                       mrst->enabled_wake = 0;
+               }
+
+               spin_lock_irq(&rtc_lock);
+               do {
+                       vrtc_cmos_write(tmp, RTC_CONTROL);
+
+                       mask = vrtc_cmos_read(RTC_INTR_FLAGS);
+                       mask &= (tmp & RTC_IRQMASK) | RTC_IRQF;
+                       if (!is_intr(mask))
+                               break;
+
+                       rtc_update_irq(mrst->rtc, 1, mask);
+                       tmp &= ~RTC_AIE;
+               } while (mask & RTC_AIE);
+               spin_unlock_irq(&rtc_lock);
+       }
+
+       dev_dbg(&mrst_rtc.rtc->dev, "resume, ctrl %02x\n", tmp);
+
+       return 0;
+}
+
+#else
+#define        mrst_suspend    NULL
+#define        mrst_resume     NULL
+
+static inline int mrst_poweroff(struct device *dev)
+{
+       return -ENOSYS;
+}
+
+#endif
+
+static int __init vrtc_mrst_platform_probe(struct platform_device *pdev)
+{
+       return vrtc_mrst_do_probe(&pdev->dev,
+                       platform_get_resource(pdev, IORESOURCE_MEM, 0),
+                       platform_get_irq(pdev, 0));
+}
+
+static int __exit vrtc_mrst_platform_remove(struct platform_device *pdev)
+{
+       rtc_mrst_do_remove(&pdev->dev);
+       return 0;
+}
+
+static void vrtc_mrst_platform_shutdown(struct platform_device *pdev)
+{
+       if (system_state == SYSTEM_POWER_OFF && !mrst_poweroff(&pdev->dev))
+               return;
+
+       rtc_mrst_do_shutdown();
+}
+
+MODULE_ALIAS("platform:vrtc_mrst");
+
+static struct platform_driver vrtc_mrst_platform_driver = {
+       .probe          = vrtc_mrst_platform_probe,
+       .remove         = __exit_p(vrtc_mrst_platform_remove),
+       .shutdown       = vrtc_mrst_platform_shutdown,
+       .driver = {
+               .name           = (char *) driver_name,
+               .suspend        = mrst_suspend,
+               .resume         = mrst_resume,
+       }
+};
+
+static int __init vrtc_mrst_init(void)
+{
+       return platform_driver_register(&vrtc_mrst_platform_driver);
+}
+
+static void __exit vrtc_mrst_exit(void)
+{
+       platform_driver_unregister(&vrtc_mrst_platform_driver);
+}
+
+module_init(vrtc_mrst_init);
+module_exit(vrtc_mrst_exit);
+
+MODULE_AUTHOR("Jacob Pan; Feng Tang");
+MODULE_DESCRIPTION("Driver for Moorestown virtual RTC");
+MODULE_LICENSE("GPL");
index 90cf0a6ff23e3cce363958cb0c3e81cde08c14af..dd14e202c2c8cbaafe4ed261cab01835a16f8f89 100644 (file)
@@ -207,7 +207,7 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 {
        struct rs5c372  *rs5c = i2c_get_clientdata(client);
-       unsigned char   buf[8];
+       unsigned char   buf[7];
        int             addr;
 
        dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d "
index c94502dfac664f55fd42080695989d9e1cb1697b..045d7e87b6323e36213d57ee6615c6362f53bab7 100644 (file)
@@ -677,7 +677,7 @@ bfa_fcs_fabric_sm_isolated(struct bfa_fcs_fabric_s *fabric,
        bfa_trc(fabric->fcs, event);
        wwn2str(pwwn_ptr, fabric->bport.port_cfg.pwwn);
 
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Port is isolated due to VF_ID mismatch. "
                "PWWN: %s Port VF_ID: %04x switch port VF_ID: %04x.",
                pwwn_ptr, fabric->fcs->port_vfid,
@@ -1411,7 +1411,7 @@ bfa_fcs_fabric_set_fabric_name(struct bfa_fcs_fabric_s *fabric,
                wwn2str(pwwn_ptr, bfa_fcs_lport_get_pwwn(&fabric->bport));
                wwn2str(fwwn_ptr,
                        bfa_fcs_lport_get_fabric_name(&fabric->bport));
-               BFA_LOG(KERN_WARNING, bfad, log_level,
+               BFA_LOG(KERN_WARNING, bfad, bfa_log_level,
                        "Base port WWN = %s Fabric WWN = %s\n",
                        pwwn_ptr, fwwn_ptr);
        }
index 9662bcdeb41d64c50ab7530fd34ab17d543fc44c..413b58eef93a08088cc7957d63fc9c815d58027c 100644 (file)
@@ -261,7 +261,7 @@ bfa_fcs_itnim_sm_hcb_online(struct bfa_fcs_itnim_s *itnim,
                bfa_fcb_itnim_online(itnim->itnim_drv);
                wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(itnim->rport->port));
                wwn2str(rpwwn_buf, itnim->rport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Target (WWN = %s) is online for initiator (WWN = %s)\n",
                rpwwn_buf, lpwwn_buf);
                break;
@@ -301,11 +301,11 @@ bfa_fcs_itnim_sm_online(struct bfa_fcs_itnim_s *itnim,
                wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(itnim->rport->port));
                wwn2str(rpwwn_buf, itnim->rport->pwwn);
                if (bfa_fcs_lport_is_online(itnim->rport->port) == BFA_TRUE)
-                       BFA_LOG(KERN_ERR, bfad, log_level,
+                       BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                        "Target (WWN = %s) connectivity lost for "
                        "initiator (WWN = %s)\n", rpwwn_buf, lpwwn_buf);
                else
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Target (WWN = %s) offlined by initiator (WWN = %s)\n",
                        rpwwn_buf, lpwwn_buf);
                break;
index 377cbfff6f2ec88a467b6e8014d27c91d3b4a0c3..8d651309302b1f745ef842869ce782d8c9f490b6 100644 (file)
@@ -491,7 +491,7 @@ bfa_fcs_lport_online_actions(struct bfa_fcs_lport_s *port)
        __port_action[port->fabric->fab_type].online(port);
 
        wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Logical port online: WWN = %s Role = %s\n",
                lpwwn_buf, "Initiator");
 
@@ -512,11 +512,11 @@ bfa_fcs_lport_offline_actions(struct bfa_fcs_lport_s *port)
 
        wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
        if (bfa_fcs_fabric_is_online(port->fabric) == BFA_TRUE)
-               BFA_LOG(KERN_ERR, bfad, log_level,
+               BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                "Logical port lost fabric connectivity: WWN = %s Role = %s\n",
                lpwwn_buf, "Initiator");
        else
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Logical port taken offline: WWN = %s Role = %s\n",
                lpwwn_buf, "Initiator");
 
@@ -573,7 +573,7 @@ bfa_fcs_lport_deleted(struct bfa_fcs_lport_s *port)
        char    lpwwn_buf[BFA_STRING_32];
 
        wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Logical port deleted: WWN = %s Role = %s\n",
                lpwwn_buf, "Initiator");
 
@@ -878,7 +878,7 @@ bfa_fcs_lport_init(struct bfa_fcs_lport_s *lport,
                                        vport ? vport->vport_drv : NULL);
 
        wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(lport));
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "New logical port created: WWN = %s Role = %s\n",
                lpwwn_buf, "Initiator");
 
index 47f35c0ef29a0c69f5d6842a6db0d173a0195e84..cf4a6e73e60d7dfa5bcaf9b7d94ea986d7389a57 100644 (file)
@@ -2056,7 +2056,7 @@ bfa_fcs_rport_online_action(struct bfa_fcs_rport_s *rport)
        wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
        wwn2str(rpwwn_buf, rport->pwwn);
        if (!BFA_FCS_PID_IS_WKA(rport->pid))
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Remote port (WWN = %s) online for logical port (WWN = %s)\n",
                rpwwn_buf, lpwwn_buf);
 }
@@ -2075,12 +2075,12 @@ bfa_fcs_rport_offline_action(struct bfa_fcs_rport_s *rport)
        wwn2str(rpwwn_buf, rport->pwwn);
        if (!BFA_FCS_PID_IS_WKA(rport->pid)) {
                if (bfa_fcs_lport_is_online(rport->port) == BFA_TRUE)
-                       BFA_LOG(KERN_ERR, bfad, log_level,
+                       BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                "Remote port (WWN = %s) connectivity lost for "
                                "logical port (WWN = %s)\n",
                                rpwwn_buf, lpwwn_buf);
                else
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "Remote port (WWN = %s) offlined by "
                                "logical port (WWN = %s)\n",
                                rpwwn_buf, lpwwn_buf);
index 54475b53a5ab1494490f2561fdc0c9d7884c0a55..9f4aa391ea9deca5508a0ae90060572ec8a53889 100644 (file)
@@ -402,7 +402,7 @@ bfa_ioc_sm_op_entry(struct bfa_ioc_s *ioc)
 
        ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_OK);
        bfa_ioc_hb_monitor(ioc);
-       BFA_LOG(KERN_INFO, bfad, log_level, "IOC enabled\n");
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level, "IOC enabled\n");
 }
 
 static void
@@ -444,7 +444,7 @@ bfa_ioc_sm_disabling_entry(struct bfa_ioc_s *ioc)
 {
        struct bfad_s *bfad = (struct bfad_s *)ioc->bfa->bfad;
        bfa_iocpf_disable(ioc);
-       BFA_LOG(KERN_INFO, bfad, log_level, "IOC disabled\n");
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level, "IOC disabled\n");
 }
 
 /*
@@ -565,7 +565,7 @@ bfa_ioc_sm_fail_entry(struct bfa_ioc_s *ioc)
                notify->cbfn(notify->cbarg);
        }
 
-       BFA_LOG(KERN_CRIT, bfad, log_level,
+       BFA_LOG(KERN_CRIT, bfad, bfa_log_level,
                "Heart Beat of IOC has failed\n");
 }
 
@@ -1812,7 +1812,7 @@ bfa_ioc_pf_fwmismatch(struct bfa_ioc_s *ioc)
         * Provide enable completion callback.
         */
        ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE);
-       BFA_LOG(KERN_WARNING, bfad, log_level,
+       BFA_LOG(KERN_WARNING, bfad, bfa_log_level,
                "Running firmware version is incompatible "
                "with the driver version\n");
 }
index c768143f4805a7cb5f0094682a39c30b73ea433d..37e16ac8f249a33a4045506df74c360d3c5eb698 100644 (file)
@@ -2138,7 +2138,7 @@ bfa_fcport_sm_enabling_qwait(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port disabled: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2198,7 +2198,7 @@ bfa_fcport_sm_enabling(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port disabled: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2251,7 +2251,7 @@ bfa_fcport_sm_linkdown(struct bfa_fcport_s *fcport,
 
                bfa_fcport_scn(fcport, BFA_PORT_LINKUP, BFA_FALSE);
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port online: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2277,7 +2277,7 @@ bfa_fcport_sm_linkdown(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port disabled: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2322,9 +2322,9 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port offline: WWN = %s\n", pwwn_buf);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port disabled: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2336,10 +2336,10 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
                                BFA_PL_EID_PORT_ST_CHANGE, 0, "Port Linkdown");
                wwn2str(pwwn_buf, fcport->pwwn);
                if (BFA_PORT_IS_DISABLED(fcport->bfa))
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "Base port offline: WWN = %s\n", pwwn_buf);
                else
-                       BFA_LOG(KERN_ERR, bfad, log_level,
+                       BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                "Base port (WWN = %s) "
                                "lost fabric connectivity\n", pwwn_buf);
                break;
@@ -2349,10 +2349,10 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
                bfa_fcport_reset_linkinfo(fcport);
                wwn2str(pwwn_buf, fcport->pwwn);
                if (BFA_PORT_IS_DISABLED(fcport->bfa))
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "Base port offline: WWN = %s\n", pwwn_buf);
                else
-                       BFA_LOG(KERN_ERR, bfad, log_level,
+                       BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                "Base port (WWN = %s) "
                                "lost fabric connectivity\n", pwwn_buf);
                break;
@@ -2363,10 +2363,10 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
                bfa_fcport_scn(fcport, BFA_PORT_LINKDOWN, BFA_FALSE);
                wwn2str(pwwn_buf, fcport->pwwn);
                if (BFA_PORT_IS_DISABLED(fcport->bfa))
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "Base port offline: WWN = %s\n", pwwn_buf);
                else
-                       BFA_LOG(KERN_ERR, bfad, log_level,
+                       BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                "Base port (WWN = %s) "
                                "lost fabric connectivity\n", pwwn_buf);
                break;
@@ -2497,7 +2497,7 @@ bfa_fcport_sm_disabling(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_ENABLE, 0, "Port Enable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port enabled: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2551,7 +2551,7 @@ bfa_fcport_sm_disabled(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_ENABLE, 0, "Port Enable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port enabled: WWN = %s\n", pwwn_buf);
                break;
 
index 1f938974b84876978600b40d97c265ddd7c2c7c0..6797720213b233bf444937445be983630e60d50d 100644 (file)
@@ -50,7 +50,7 @@ int           reqq_size, rspq_size, num_sgpgs;
 int            rport_del_timeout = BFA_FCS_RPORT_DEF_DEL_TIMEOUT;
 int            bfa_lun_queue_depth = BFAD_LUN_QUEUE_DEPTH;
 int            bfa_io_max_sge = BFAD_IO_MAX_SGE;
-int            log_level = 3; /* WARNING log level */
+int            bfa_log_level = 3; /* WARNING log level */
 int            ioc_auto_recover = BFA_TRUE;
 int            bfa_linkup_delay = -1;
 int            fdmi_enable = BFA_TRUE;
@@ -108,8 +108,8 @@ module_param(bfa_lun_queue_depth, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(bfa_lun_queue_depth, "Lun queue depth, default=32, Range[>0]");
 module_param(bfa_io_max_sge, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(bfa_io_max_sge, "Max io scatter/gather elements, default=255");
-module_param(log_level, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(log_level, "Driver log level, default=3, "
+module_param(bfa_log_level, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(bfa_log_level, "Driver log level, default=3, "
                                "Range[Critical:1|Error:2|Warning:3|Info:4]");
 module_param(ioc_auto_recover, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(ioc_auto_recover, "IOC auto recovery, default=1, "
@@ -1112,7 +1112,7 @@ bfad_start_ops(struct bfad_s *bfad) {
        } else
                bfad_os_rport_online_wait(bfad);
 
-       BFA_LOG(KERN_INFO, bfad, log_level, "bfa device claimed\n");
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level, "bfa device claimed\n");
 
        return BFA_STATUS_OK;
 }
index 97f9b6c0937e75cf3a16762092ac46ac6f6cb890..d5ce2349ac59fa5c13c6d3b89d3ec2927561b84c 100644 (file)
@@ -337,7 +337,7 @@ extern int  num_sgpgs;
 extern int      rport_del_timeout;
 extern int      bfa_lun_queue_depth;
 extern int      bfa_io_max_sge;
-extern int      log_level;
+extern int      bfa_log_level;
 extern int      ioc_auto_recover;
 extern int      bfa_linkup_delay;
 extern int      msix_disable_cb;
index 8ca967dee66d4eac1b551c82ff5416454e0ffcb6..fbad5e9b240218bb349ac4c5217e328e785106c3 100644 (file)
@@ -225,7 +225,8 @@ bfad_im_abort_handler(struct scsi_cmnd *cmnd)
        }
 
        bfa_trc(bfad, hal_io->iotag);
-       BFA_LOG(KERN_INFO, bfad, log_level, "scsi%d: abort cmnd %p iotag %x\n",
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
+               "scsi%d: abort cmnd %p iotag %x\n",
                im_port->shost->host_no, cmnd, hal_io->iotag);
        (void) bfa_ioim_abort(hal_io);
        spin_unlock_irqrestore(&bfad->bfad_lock, flags);
@@ -241,7 +242,7 @@ bfad_im_abort_handler(struct scsi_cmnd *cmnd)
 
        cmnd->scsi_done(cmnd);
        bfa_trc(bfad, hal_io->iotag);
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "scsi%d: complete abort 0x%p iotag 0x%x\n",
                im_port->shost->host_no, cmnd, hal_io->iotag);
        return SUCCESS;
@@ -260,7 +261,7 @@ bfad_im_target_reset_send(struct bfad_s *bfad, struct scsi_cmnd *cmnd,
 
        tskim = bfa_tskim_alloc(&bfad->bfa, (struct bfad_tskim_s *) cmnd);
        if (!tskim) {
-               BFA_LOG(KERN_ERR, bfad, log_level,
+               BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                        "target reset, fail to allocate tskim\n");
                rc = BFA_STATUS_FAILED;
                goto out;
@@ -311,7 +312,7 @@ bfad_im_reset_lun_handler(struct scsi_cmnd *cmnd)
 
        tskim = bfa_tskim_alloc(&bfad->bfa, (struct bfad_tskim_s *) cmnd);
        if (!tskim) {
-               BFA_LOG(KERN_ERR, bfad, log_level,
+               BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                "LUN reset, fail to allocate tskim");
                spin_unlock_irqrestore(&bfad->bfad_lock, flags);
                rc = FAILED;
@@ -336,7 +337,7 @@ bfad_im_reset_lun_handler(struct scsi_cmnd *cmnd)
 
        task_status = cmnd->SCp.Status >> 1;
        if (task_status != BFI_TSKIM_STS_OK) {
-               BFA_LOG(KERN_ERR, bfad, log_level,
+               BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                        "LUN reset failure, status: %d\n", task_status);
                rc = FAILED;
        }
@@ -380,7 +381,7 @@ bfad_im_reset_bus_handler(struct scsi_cmnd *cmnd)
 
                        task_status = cmnd->SCp.Status >> 1;
                        if (task_status != BFI_TSKIM_STS_OK) {
-                               BFA_LOG(KERN_ERR, bfad, log_level,
+                               BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                        "target reset failure,"
                                        " status: %d\n", task_status);
                                err_cnt++;
@@ -460,7 +461,7 @@ bfa_fcb_itnim_free(struct bfad_s *bfad, struct bfad_itnim_s *itnim_drv)
        fcid = bfa_fcs_itnim_get_fcid(&itnim_drv->fcs_itnim);
        wwn2str(wwpn_str, wwpn);
        fcid2str(fcid_str, fcid);
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "ITNIM FREE scsi%d: FCID: %s WWPN: %s\n",
                port->im_port->shost->host_no,
                fcid_str, wwpn_str);
@@ -589,7 +590,7 @@ void
 bfad_im_scsi_host_free(struct bfad_s *bfad, struct bfad_im_port_s *im_port)
 {
        bfa_trc(bfad, bfad->inst_no);
-       BFA_LOG(KERN_INFO, bfad, log_level, "Free scsi%d\n",
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level, "Free scsi%d\n",
                        im_port->shost->host_no);
 
        fc_remove_host(im_port->shost);
@@ -1048,7 +1049,7 @@ bfad_im_itnim_work_handler(struct work_struct *work)
                        fcid2str(fcid_str, fcid);
                        list_add_tail(&itnim->list_entry,
                                &im_port->itnim_mapped_list);
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "ITNIM ONLINE Target: %d:0:%d "
                                "FCID: %s WWPN: %s\n",
                                im_port->shost->host_no,
@@ -1081,7 +1082,7 @@ bfad_im_itnim_work_handler(struct work_struct *work)
                        wwn2str(wwpn_str, wwpn);
                        fcid2str(fcid_str, fcid);
                        list_del(&itnim->list_entry);
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "ITNIM OFFLINE Target: %d:0:%d "
                                "FCID: %s WWPN: %s\n",
                                im_port->shost->host_no,
index e5e9e6735f7d62e3d585505de7be4fe379fbbd40..9739431092d126aefdef78ec03888c66b40b256d 100644 (file)
@@ -198,6 +198,7 @@ int __init register_intc_controller(struct intc_desc *desc)
        list_add_tail(&d->list, &intc_list);
 
        raw_spin_lock_init(&d->lock);
+       INIT_RADIX_TREE(&d->tree, GFP_ATOMIC);
 
        d->index = nr_intc_controllers;
 
index 052b3c7fa6a0f644d26c613e67e69ffe00ad05be..8856bcca9d2933db4a8f98526bc716c6640bb075 100644 (file)
@@ -317,7 +317,7 @@ static void mcfqspi_work(struct work_struct *work)
                msg = container_of(mcfqspi->msgq.next, struct spi_message,
                                   queue);
 
-               list_del_init(&mcfqspi->msgq);
+               list_del_init(&msg->queue);
                spin_unlock_irqrestore(&mcfqspi->lock, flags);
 
                spi = msg->spi;
index ec9f0b1bf86494da73bd8d14cc69fe5927db8065..84439f655601f71577483acd423aed7622fae807 100644 (file)
@@ -563,7 +563,7 @@ static struct of_platform_driver mpc52xx_spi_of_driver = {
                .of_match_table = mpc52xx_spi_match,
        },
        .probe = mpc52xx_spi_probe,
-       .remove = __exit_p(mpc52xx_spi_remove),
+       .remove = __devexit_p(mpc52xx_spi_remove),
 };
 
 static int __init mpc52xx_spi_init(void)
index 2a651e61bfbff30f23e8e44dddfcb70ab8a05173..951a160fc27fbe2614376776ba3b712646e305ff 100644 (file)
@@ -1305,10 +1305,49 @@ static int __exit omap2_mcspi_remove(struct platform_device *pdev)
 /* work with hotplug and coldplug */
 MODULE_ALIAS("platform:omap2_mcspi");
 
+#ifdef CONFIG_SUSPEND
+/*
+ * When SPI wake up from off-mode, CS is in activate state. If it was in
+ * unactive state when driver was suspend, then force it to unactive state at
+ * wake up.
+ */
+static int omap2_mcspi_resume(struct device *dev)
+{
+       struct spi_master       *master = dev_get_drvdata(dev);
+       struct omap2_mcspi      *mcspi = spi_master_get_devdata(master);
+       struct omap2_mcspi_cs *cs;
+
+       omap2_mcspi_enable_clocks(mcspi);
+       list_for_each_entry(cs, &omap2_mcspi_ctx[master->bus_num - 1].cs,
+                           node) {
+               if ((cs->chconf0 & OMAP2_MCSPI_CHCONF_FORCE) == 0) {
+
+                       /*
+                        * We need to toggle CS state for OMAP take this
+                        * change in account.
+                        */
+                       MOD_REG_BIT(cs->chconf0, OMAP2_MCSPI_CHCONF_FORCE, 1);
+                       __raw_writel(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
+                       MOD_REG_BIT(cs->chconf0, OMAP2_MCSPI_CHCONF_FORCE, 0);
+                       __raw_writel(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
+               }
+       }
+       omap2_mcspi_disable_clocks(mcspi);
+       return 0;
+}
+#else
+#define        omap2_mcspi_resume      NULL
+#endif
+
+static const struct dev_pm_ops omap2_mcspi_pm_ops = {
+       .resume = omap2_mcspi_resume,
+};
+
 static struct platform_driver omap2_mcspi_driver = {
        .driver = {
                .name =         "omap2_mcspi",
                .owner =        THIS_MODULE,
+               .pm =           &omap2_mcspi_pm_ops
        },
        .remove =       __exit_p(omap2_mcspi_remove),
 };
index 709c836607de23636e6c567a38833cceff752b08..b02d0cbce89049e003fe8300421adeed847e92ce 100644 (file)
@@ -584,8 +584,7 @@ void spi_unregister_master(struct spi_master *master)
        list_del(&master->list);
        mutex_unlock(&board_lock);
 
-       dummy = device_for_each_child(master->dev.parent, &master->dev,
-                                       __unregister);
+       dummy = device_for_each_child(&master->dev, NULL, __unregister);
        device_unregister(&master->dev);
 }
 EXPORT_SYMBOL_GPL(spi_unregister_master);
index e3b4f645196603e517e22ef782205a02fb224b22..a99e2333b949efdbc10e391253ae952137238b7d 100644 (file)
@@ -258,18 +258,18 @@ static int fsl_espi_bufs(struct spi_device *spi, struct spi_transfer *t)
        return mpc8xxx_spi->count;
 }
 
-static void fsl_espi_addr2cmd(unsigned int addr, u8 *cmd)
+static inline void fsl_espi_addr2cmd(unsigned int addr, u8 *cmd)
 {
-       if (cmd[1] && cmd[2] && cmd[3]) {
+       if (cmd) {
                cmd[1] = (u8)(addr >> 16);
                cmd[2] = (u8)(addr >> 8);
                cmd[3] = (u8)(addr >> 0);
        }
 }
 
-static unsigned int fsl_espi_cmd2addr(u8 *cmd)
+static inline unsigned int fsl_espi_cmd2addr(u8 *cmd)
 {
-       if (cmd[1] && cmd[2] && cmd[3])
+       if (cmd)
                return cmd[1] << 16 | cmd[2] << 8 | cmd[3] << 0;
 
        return 0;
@@ -395,9 +395,11 @@ static void fsl_espi_rw_trans(struct spi_message *m,
                        }
                }
 
-               addr = fsl_espi_cmd2addr(local_buf);
-               addr += pos;
-               fsl_espi_addr2cmd(addr, local_buf);
+               if (pos > 0) {
+                       addr = fsl_espi_cmd2addr(local_buf);
+                       addr += pos;
+                       fsl_espi_addr2cmd(addr, local_buf);
+               }
 
                espi_trans->n_tx = n_tx;
                espi_trans->n_rx = trans_len;
@@ -507,16 +509,29 @@ void fsl_espi_cpu_irq(struct mpc8xxx_spi *mspi, u32 events)
 
        /* We need handle RX first */
        if (events & SPIE_NE) {
-               u32 rx_data;
+               u32 rx_data, tmp;
+               u8 rx_data_8;
 
                /* Spin until RX is done */
                while (SPIE_RXCNT(events) < min(4, mspi->len)) {
                        cpu_relax();
                        events = mpc8xxx_spi_read_reg(&reg_base->event);
                }
-               mspi->len -= 4;
 
-               rx_data = mpc8xxx_spi_read_reg(&reg_base->receive);
+               if (mspi->len >= 4) {
+                       rx_data = mpc8xxx_spi_read_reg(&reg_base->receive);
+               } else {
+                       tmp = mspi->len;
+                       rx_data = 0;
+                       while (tmp--) {
+                               rx_data_8 = in_8((u8 *)&reg_base->receive);
+                               rx_data |= (rx_data_8 << (tmp * 8));
+                       }
+
+                       rx_data <<= (4 - mspi->len) * 8;
+               }
+
+               mspi->len -= 4;
 
                if (mspi->rx)
                        mspi->get_rx(rx_data, mspi);
index 8c3c057aa8478fb2a436edfd571843513544ca71..d0e9e0207539e2c9491fc6c1de25c75dda4bd9c6 100644 (file)
@@ -435,12 +435,6 @@ static int zram_make_request(struct request_queue *queue, struct bio *bio)
        int ret = 0;
        struct zram *zram = queue->queuedata;
 
-       if (unlikely(!zram->init_done)) {
-               set_bit(BIO_UPTODATE, &bio->bi_flags);
-               bio_endio(bio, 0);
-               return 0;
-       }
-
        if (!valid_io_request(zram, bio)) {
                zram_stat64_inc(zram, &zram->stats.invalid_io);
                bio_io_error(bio);
index 44447f54942f6d6e1c7c7e2a777b4c882f6b546d..99ac70e32556f841b83c909108767dedf444237e 100644 (file)
@@ -2206,8 +2206,11 @@ static int uea_boot(struct uea_softc *sc)
                goto err1;
        }
 
-       sc->kthread = kthread_run(uea_kthread, sc, "ueagle-atm");
-       if (sc->kthread == ERR_PTR(-ENOMEM)) {
+       /* Create worker thread, but don't start it here.  Start it after
+        * all usbatm generic initialization is done.
+        */
+       sc->kthread = kthread_create(uea_kthread, sc, "ueagle-atm");
+       if (IS_ERR(sc->kthread)) {
                uea_err(INS_TO_USBDEV(sc), "failed to create thread\n");
                goto err2;
        }
@@ -2624,6 +2627,7 @@ static struct usbatm_driver uea_usbatm_driver = {
 static int uea_probe(struct usb_interface *intf, const struct usb_device_id *id)
 {
        struct usb_device *usb = interface_to_usbdev(intf);
+       int ret;
 
        uea_enters(usb);
        uea_info(usb, "ADSL device founded vid (%#X) pid (%#X) Rev (%#X): %s\n",
@@ -2637,7 +2641,19 @@ static int uea_probe(struct usb_interface *intf, const struct usb_device_id *id)
        if (UEA_IS_PREFIRM(id))
                return uea_load_firmware(usb, UEA_CHIP_VERSION(id));
 
-       return usbatm_usb_probe(intf, id, &uea_usbatm_driver);
+       ret = usbatm_usb_probe(intf, id, &uea_usbatm_driver);
+       if (ret == 0) {
+               struct usbatm_data *usbatm = usb_get_intfdata(intf);
+               struct uea_softc *sc = usbatm->driver_data;
+
+               /* Ensure carrier is initialized to off as early as possible */
+               UPDATE_ATM_SIGNAL(ATM_PHY_SIG_LOST);
+
+               /* Only start the worker thread when all init is done */
+               wake_up_process(sc->kthread);
+       }
+
+       return ret;
 }
 
 static void uea_disconnect(struct usb_interface *intf)
index a4f4546f0be08555f8a2d793b6b94beaae91186f..397d15eb1ea8aee3d4fe870ae87f8b14f161654f 100644 (file)
@@ -242,6 +242,7 @@ static int cr_backlight_remove(struct platform_device *pdev)
        backlight_device_unregister(crp->cr_backlight_device);
        lcd_device_unregister(crp->cr_lcd_device);
        pci_dev_put(lpc_dev);
+       kfree(crp);
 
        return 0;
 }
index 0e6aa3d96a4246f7c05b0274a2295a3275d6d2ac..4ac1201ad6c2fb3229a60e1377f29b4eef78a2eb 100644 (file)
@@ -1458,7 +1458,7 @@ static bool apertures_overlap(struct aperture *gen, struct aperture *hw)
        if (gen->base == hw->base)
                return true;
        /* is the generic aperture base inside the hw base->hw base+size */
-       if (gen->base > hw->base && gen->base <= hw->base + hw->size)
+       if (gen->base > hw->base && gen->base < hw->base + hw->size)
                return true;
        return false;
 }
index 5c363d026f64c2856b44e748ecb149d9242be6b6..1ab2c25886757616c4faa324620ed7536fac9350 100644 (file)
 #define LCDC_SIZE      0x04
 #define SIZE_XMAX(x)   ((((x) >> 4) & 0x3f) << 20)
 
-#ifdef CONFIG_ARCH_MX1
-#define SIZE_YMAX(y)   ((y) & 0x1ff)
-#else
-#define SIZE_YMAX(y)   ((y) & 0x3ff)
-#endif
+#define YMAX_MASK       (cpu_is_mx1() ? 0x1ff : 0x3ff)
+#define SIZE_YMAX(y)   ((y) & YMAX_MASK)
 
 #define LCDC_VPW       0x08
 #define VPW_VPW(x)     ((x) & 0x3ff)
@@ -623,7 +620,7 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf
        if (var->right_margin > 255)
                printk(KERN_ERR "%s: invalid right_margin %d\n",
                        info->fix.id, var->right_margin);
-       if (var->yres < 1 || var->yres > 511)
+       if (var->yres < 1 || var->yres > YMAX_MASK)
                printk(KERN_ERR "%s: invalid yres %d\n",
                        info->fix.id, var->yres);
        if (var->vsync_len > 100)
index d7df10315d8d6f51aba676328985a52128e81876..fcda0e97011384bd7ee55eaeeed5377f328cf016 100644 (file)
@@ -787,6 +787,9 @@ static int sh_hdmi_read_edid(struct sh_hdmi *hdmi)
                found_rate_error = rate_error;
        }
 
+       hdmi->var.width = hdmi->monspec.max_x * 10;
+       hdmi->var.height = hdmi->monspec.max_y * 10;
+
        /*
         * TODO 1: if no ->info is present, postpone running the config until
         * after ->info first gets registered.
@@ -960,8 +963,12 @@ static bool sh_hdmi_must_reconfigure(struct sh_hdmi *hdmi)
        dev_dbg(info->dev, "Old %ux%u, new %ux%u\n",
                mode1.xres, mode1.yres, mode2.xres, mode2.yres);
 
-       if (fb_mode_is_equal(&mode1, &mode2))
+       if (fb_mode_is_equal(&mode1, &mode2)) {
+               /* It can be a different monitor with an equal video-mode */
+               old_var->width = new_var->width;
+               old_var->height = new_var->height;
                return false;
+       }
 
        dev_dbg(info->dev, "Switching %u -> %u lines\n",
                mode1.yres, mode2.yres);
@@ -1057,8 +1064,11 @@ static void sh_hdmi_edid_work_fn(struct work_struct *work)
                         * on, if we run a resume here, the logo disappears
                         */
                        if (lock_fb_info(hdmi->info)) {
-                               sh_hdmi_display_on(hdmi, hdmi->info);
-                               unlock_fb_info(hdmi->info);
+                               struct fb_info *info = hdmi->info;
+                               info->var.width = hdmi->var.width;
+                               info->var.height = hdmi->var.height;
+                               sh_hdmi_display_on(hdmi, info);
+                               unlock_fb_info(info);
                        }
                } else {
                        /* New monitor or have to wake up */
index b02d97a879d67256992a3941536fcd7e88626f63..c05326b61235e724e18ea288d88aa66138751899 100644 (file)
@@ -54,8 +54,8 @@ static int lcdc_shared_regs[] = {
 };
 #define NR_SHARED_REGS ARRAY_SIZE(lcdc_shared_regs)
 
-#define DEFAULT_XRES 1280
-#define DEFAULT_YRES 1024
+#define MAX_XRES 1920
+#define MAX_YRES 1080
 
 static unsigned long lcdc_offs_mainlcd[NR_CH_REGS] = {
        [LDDCKPAT1R] = 0x400,
@@ -914,22 +914,12 @@ static int sh_mobile_check_var(struct fb_var_screeninfo *var, struct fb_info *in
 {
        struct sh_mobile_lcdc_chan *ch = info->par;
 
-       if (var->xres < 160 || var->xres > 1920 ||
-           var->yres < 120 || var->yres > 1080 ||
-           var->left_margin < 32 || var->left_margin > 320 ||
-           var->right_margin < 12 || var->right_margin > 240 ||
-           var->upper_margin < 12 || var->upper_margin > 120 ||
-           var->lower_margin < 1 || var->lower_margin > 64 ||
-           var->hsync_len < 32 || var->hsync_len > 240 ||
-           var->vsync_len < 2 || var->vsync_len > 64 ||
-           var->pixclock < 6000 || var->pixclock > 40000 ||
+       if (var->xres > MAX_XRES || var->yres > MAX_YRES ||
            var->xres * var->yres * (ch->cfg.bpp / 8) * 2 > info->fix.smem_len) {
-               dev_warn(info->dev, "Invalid info: %u %u %u %u %u %u %u %u %u!\n",
-                        var->xres, var->yres,
-                        var->left_margin, var->right_margin,
-                        var->upper_margin, var->lower_margin,
-                        var->hsync_len, var->vsync_len,
-                        var->pixclock);
+               dev_warn(info->dev, "Invalid info: %u-%u-%u-%u x %u-%u-%u-%u @ %ukHz!\n",
+                        var->left_margin, var->xres, var->right_margin, var->hsync_len,
+                        var->upper_margin, var->yres, var->lower_margin, var->vsync_len,
+                        PICOS2KHZ(var->pixclock));
                return -EINVAL;
        }
        return 0;
@@ -1226,7 +1216,7 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev)
                }
 
                if (!mode)
-                       max_size = DEFAULT_XRES * DEFAULT_YRES;
+                       max_size = MAX_XRES * MAX_YRES;
                else if (max_cfg)
                        dev_dbg(&pdev->dev, "Found largest videomode %ux%u\n",
                                max_cfg->xres, max_cfg->yres);
@@ -1238,12 +1228,14 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev)
                        mode = &default_720p;
                        num_cfg = 1;
                } else {
-                       num_cfg = ch->cfg.num_cfg;
+                       num_cfg = cfg->num_cfg;
                }
 
                fb_videomode_to_modelist(mode, num_cfg, &info->modelist);
 
                fb_videomode_to_var(var, mode);
+               var->width = cfg->lcd_size_cfg.width;
+               var->height = cfg->lcd_size_cfg.height;
                /* Default Y virtual resolution is 2x panel size */
                var->yres_virtual = var->yres * 2;
                var->activate = FB_ACTIVATE_NOW;
index 3d77116e463410dac81b6c530c74d5a55f9c7afc..dea7b5bf6e2ccd986cf99840db504d6a38382293 100644 (file)
@@ -642,19 +642,14 @@ static struct notifier_block die_notifier = {
  */
 
 #ifdef CONFIG_HPWDT_NMI_DECODING
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#ifdef CONFIG_X86_LOCAL_APIC
 static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
 {
        /*
         * If nmi_watchdog is turned off then we can turn on
         * our nmi decoding capability.
         */
-       if (!nmi_watchdog_active())
-               hpwdt_nmi_decoding = 1;
-       else
-               dev_warn(&dev->dev, "NMI decoding is disabled. To enable this "
-                       "functionality you must reboot with nmi_watchdog=0 "
-                       "and load the hpwdt driver with priority=1.\n");
+       hpwdt_nmi_decoding = 1;
 }
 #else
 static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
@@ -662,7 +657,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
        dev_warn(&dev->dev, "NMI decoding is disabled. "
                "Your kernel does not support a NMI Watchdog.\n");
 }
-#endif /* ARCH_HAS_NMI_WATCHDOG */
+#endif /* CONFIG_X86_LOCAL_APIC */
 
 static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
 {
index 428f8a1583e8598ae69414f8171e21a3bbbf2ba8..3939e53f5f981d327ff344d522ab59f4df43238b 100644 (file)
@@ -231,7 +231,7 @@ static int __devinit rdc321x_wdt_probe(struct platform_device *pdev)
        struct resource *r;
        struct rdc321x_wdt_pdata *pdata;
 
-       pdata = pdev->dev.platform_data;
+       pdata = platform_get_drvdata(pdev);
        if (!pdata) {
                dev_err(&pdev->dev, "no platform data supplied\n");
                return -ENODEV;
index dc963929de652cb997550e38338855823832e53c..981c8477adab6dcde6708d2b8bfde8bf83c8d53b 100644 (file)
@@ -232,6 +232,8 @@ static int setup_new_group_blocks(struct super_block *sb,
                               GFP_NOFS);
        if (err)
                goto exit_bh;
+       for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++)
+               ext4_set_bit(bit, bh->b_data);
 
        ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
                   input->block_bitmap - start);
@@ -247,6 +249,9 @@ static int setup_new_group_blocks(struct super_block *sb,
        err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
        if (err)
                goto exit_bh;
+       for (i = 0, bit = input->inode_table - start;
+            i < sbi->s_itb_per_group; i++, bit++)
+               ext4_set_bit(bit, bh->b_data);
 
        if ((err = extend_or_restart_transaction(handle, 2, bh)))
                goto exit_bh;
index 5476c066d4ee336733445eda2f804561179ecb41..3c4039d5eef12d1b35ffd93c3f1861e43cc9b520 100644 (file)
@@ -763,7 +763,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        int metadata;
        unsigned int revokes = 0;
        int x;
-       int error;
+       int error = 0;
 
        if (!*top)
                sm->sm_first = 0;
@@ -780,7 +780,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        if (metadata)
                revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
 
-       error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
+       if (ip != GFS2_I(sdp->sd_rindex))
+               error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
+       else if (!sdp->sd_rgrps)
+               error = gfs2_ri_update(ip);
+
        if (error)
                return error;
 
@@ -879,7 +883,8 @@ out_rg_gunlock:
 out_rlist:
        gfs2_rlist_free(&rlist);
 out:
-       gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
+       if (ip != GFS2_I(sdp->sd_rindex))
+               gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
        return error;
 }
 
index f92c1770416981df8b625b6f918bac6c061c6e5e..08a8beb152e60d6aa4dd0b38ea852973e99263d4 100644 (file)
@@ -541,21 +541,6 @@ out_locked:
        spin_unlock(&gl->gl_spin);
 }
 
-static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
-                                unsigned int req_state,
-                                unsigned int flags)
-{
-       int ret = LM_OUT_ERROR;
-
-       if (!sdp->sd_lockstruct.ls_ops->lm_lock)
-               return req_state == LM_ST_UNLOCKED ? 0 : req_state;
-
-       if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-               ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
-                                                        req_state, flags);
-       return ret;
-}
-
 /**
  * do_xmote - Calls the DLM to change the state of a lock
  * @gl: The lock state
@@ -575,13 +560,14 @@ __acquires(&gl->gl_spin)
 
        lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
                      LM_FLAG_PRIORITY);
-       BUG_ON(gl->gl_state == target);
-       BUG_ON(gl->gl_state == gl->gl_target);
+       GLOCK_BUG_ON(gl, gl->gl_state == target);
+       GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
        if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
            glops->go_inval) {
                set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
                do_error(gl, 0); /* Fail queued try locks */
        }
+       gl->gl_req = target;
        spin_unlock(&gl->gl_spin);
        if (glops->go_xmote_th)
                glops->go_xmote_th(gl);
@@ -594,15 +580,17 @@ __acquires(&gl->gl_spin)
            gl->gl_state == LM_ST_DEFERRED) &&
            !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
                lck_flags |= LM_FLAG_TRY_1CB;
-       ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
 
-       if (!(ret & LM_OUT_ASYNC)) {
-               finish_xmote(gl, ret);
+       if (sdp->sd_lockstruct.ls_ops->lm_lock) {
+               /* lock_dlm */
+               ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
+               GLOCK_BUG_ON(gl, ret);
+       } else { /* lock_nolock */
+               finish_xmote(gl, target);
                if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
                        gfs2_glock_put(gl);
-       } else {
-               GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC);
        }
+
        spin_lock(&gl->gl_spin);
 }
 
@@ -951,17 +939,22 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
 
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
 {
+       struct va_format vaf;
        va_list args;
 
        va_start(args, fmt);
+
        if (seq) {
                struct gfs2_glock_iter *gi = seq->private;
                vsprintf(gi->string, fmt, args);
                seq_printf(seq, gi->string);
        } else {
-               printk(KERN_ERR " ");
-               vprintk(fmt, args);
+               vaf.fmt = fmt;
+               vaf.va = &args;
+
+               printk(KERN_ERR " %pV", &vaf);
        }
+
        va_end(args);
 }
 
@@ -1361,24 +1354,28 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl)
  * @gl: Pointer to the glock
  * @ret: The return value from the dlm
  *
+ * The gl_reply field is under the gl_spin lock so that it is ok
+ * to use a bitfield shared with other glock state fields.
  */
 
 void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 {
        struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
 
+       spin_lock(&gl->gl_spin);
        gl->gl_reply = ret;
 
        if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
-               spin_lock(&gl->gl_spin);
                if (gfs2_should_freeze(gl)) {
                        set_bit(GLF_FROZEN, &gl->gl_flags);
                        spin_unlock(&gl->gl_spin);
                        return;
                }
-               spin_unlock(&gl->gl_spin);
        }
+
+       spin_unlock(&gl->gl_spin);
        set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+       smp_wmb();
        gfs2_glock_hold(gl);
        if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
                gfs2_glock_put(gl);
@@ -1626,18 +1623,17 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
 static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
 {
        struct task_struct *gh_owner = NULL;
-       char buffer[KSYM_SYMBOL_LEN];
        char flags_buf[32];
 
-       sprint_symbol(buffer, gh->gh_ip);
        if (gh->gh_owner_pid)
                gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
-       gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n",
-                 state2str(gh->gh_state),
-                 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
-                 gh->gh_error, 
-                 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
-                 gh_owner ? gh_owner->comm : "(ended)", buffer);
+       gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
+                      state2str(gh->gh_state),
+                      hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
+                      gh->gh_error,
+                      gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
+                      gh_owner ? gh_owner->comm : "(ended)",
+                      (void *)gh->gh_ip);
        return 0;
 }
 
@@ -1782,12 +1778,13 @@ int __init gfs2_glock_init(void)
        }
 #endif
 
-       glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER |
+       glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
                                          WQ_HIGHPRI | WQ_FREEZEABLE, 0);
        if (IS_ERR(glock_workqueue))
                return PTR_ERR(glock_workqueue);
-       gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER |
-                                               WQ_FREEZEABLE, 0);
+       gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
+                                               WQ_MEM_RECLAIM | WQ_FREEZEABLE,
+                                               0);
        if (IS_ERR(gfs2_delete_workqueue)) {
                destroy_workqueue(glock_workqueue);
                return PTR_ERR(gfs2_delete_workqueue);
index db1c26d6d2206c8f9e9b68396380ed8791f3c720..691851ceb6153f59b91cd64d1ce0fecea46904e0 100644 (file)
@@ -87,11 +87,10 @@ enum {
 #define GL_ASYNC               0x00000040
 #define GL_EXACT               0x00000080
 #define GL_SKIP                        0x00000100
-#define GL_ATIME               0x00000200
 #define GL_NOCACHE             0x00000400
   
 /*
- * lm_lock() and lm_async_cb return flags
+ * lm_async_cb return flags
  *
  * LM_OUT_ST_MASK
  * Masks the lower two bits of lock state in the returned value.
@@ -99,15 +98,11 @@ enum {
  * LM_OUT_CANCELED
  * The lock request was canceled.
  *
- * LM_OUT_ASYNC
- * The result of the request will be returned in an LM_CB_ASYNC callback.
- *
  */
 
 #define LM_OUT_ST_MASK         0x00000003
 #define LM_OUT_CANCELED                0x00000008
-#define LM_OUT_ASYNC           0x00000080
-#define LM_OUT_ERROR           0x00000100
+#define LM_OUT_ERROR           0x00000004
 
 /*
  * lm_recovery_done() messages
@@ -124,25 +119,12 @@ struct lm_lockops {
        void (*lm_unmount) (struct gfs2_sbd *sdp);
        void (*lm_withdraw) (struct gfs2_sbd *sdp);
        void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
-       unsigned int (*lm_lock) (struct gfs2_glock *gl,
-                                unsigned int req_state, unsigned int flags);
+       int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
+                       unsigned int flags);
        void (*lm_cancel) (struct gfs2_glock *gl);
        const match_table_t *lm_tokens;
 };
 
-#define LM_FLAG_TRY            0x00000001
-#define LM_FLAG_TRY_1CB                0x00000002
-#define LM_FLAG_NOEXP          0x00000004
-#define LM_FLAG_ANY            0x00000008
-#define LM_FLAG_PRIORITY       0x00000010
-
-#define GL_ASYNC               0x00000040
-#define GL_EXACT               0x00000080
-#define GL_SKIP                        0x00000100
-#define GL_NOCACHE             0x00000400
-
-#define GLR_TRYFAILED          13
-
 extern struct workqueue_struct *gfs2_delete_workqueue;
 static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
 {
@@ -212,6 +194,8 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
+
+__attribute__ ((format(printf, 2, 3)))
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
 
 /**
index 0d149dcc04e515adfaaeb632a6677e5e3b555f45..263561bf1a5059b4bf644340faa6a4435c62d14f 100644 (file)
@@ -325,7 +325,6 @@ static void trans_go_sync(struct gfs2_glock *gl)
 
        if (gl->gl_state != LM_ST_UNLOCKED &&
            test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
-               flush_workqueue(gfs2_delete_workqueue);
                gfs2_meta_syncfs(sdp);
                gfs2_log_shutdown(sdp);
        }
index 764fbb49efc8e3adbdeda7f83f178b0fd6ea70f8..8d3d2b4a0a7d64431d63edff082cbedbd5b2543b 100644 (file)
@@ -207,12 +207,14 @@ struct gfs2_glock {
 
        spinlock_t gl_spin;
 
-       unsigned int gl_state;
-       unsigned int gl_target;
-       unsigned int gl_reply;
+       /* State fields protected by gl_spin */
+       unsigned int gl_state:2,        /* Current state */
+                    gl_target:2,       /* Target state */
+                    gl_demote_state:2, /* State requested by remote node */
+                    gl_req:2,          /* State in last dlm request */
+                    gl_reply:8;        /* Last reply from the dlm */
+
        unsigned int gl_hash;
-       unsigned int gl_req;
-       unsigned int gl_demote_state; /* state requested by remote node */
        unsigned long gl_demote_time; /* time of first demote request */
        struct list_head gl_holders;
 
index e1213f7f92179aa2472304ff0db4294be66040d8..14e682dbe8bff4bd4063e4a54a5445cbcfe17937 100644 (file)
@@ -916,17 +916,8 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
        if (error)
                return error;
 
-       if ((attr->ia_valid & ATTR_SIZE) &&
-           attr->ia_size != i_size_read(inode)) {
-               error = vmtruncate(inode, attr->ia_size);
-               if (error)
-                       return error;
-       }
-
        setattr_copy(inode, attr);
        mark_inode_dirty(inode);
-
-       gfs2_assert_warn(GFS2_SB(inode), !error);
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
index 1c09425b45fd728ba52c1f5f49c3feac187640a2..6e493aee28f82dfb593574f751ed81025207645c 100644 (file)
@@ -146,15 +146,13 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
        return lkf;
 }
 
-static unsigned int gdlm_lock(struct gfs2_glock *gl,
-                             unsigned int req_state, unsigned int flags)
+static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
+                    unsigned int flags)
 {
        struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
-       int error;
        int req;
        u32 lkf;
 
-       gl->gl_req = req_state;
        req = make_mode(req_state);
        lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
 
@@ -162,13 +160,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
         * Submit the actual lock request.
         */
 
-       error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
-                        GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
-       if (error == -EAGAIN)
-               return 0;
-       if (error)
-               return LM_OUT_ERROR;
-       return LM_OUT_ASYNC;
+       return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
+                       GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
 }
 
 static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
index 12cbea7502c26040fb90db5750e764bdd831079a..1db6b73432298d4092c0e8684483b8fff29c78ee 100644 (file)
@@ -1069,7 +1069,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
-       struct buffer_head *dibh;
        u32 ouid, ogid, nuid, ngid;
        int error;
 
@@ -1100,25 +1099,10 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
        if (error)
                goto out_gunlock_q;
 
-       error = gfs2_meta_inode_buffer(ip, &dibh);
+       error = gfs2_setattr_simple(ip, attr);
        if (error)
                goto out_end_trans;
 
-       if ((attr->ia_valid & ATTR_SIZE) &&
-           attr->ia_size != i_size_read(inode)) {
-               int error;
-
-               error = vmtruncate(inode, attr->ia_size);
-               gfs2_assert_warn(sdp, !error);
-       }
-
-       setattr_copy(inode, attr);
-       mark_inode_dirty(inode);
-
-       gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-       gfs2_dinode_out(ip, dibh->b_data);
-       brelse(dibh);
-
        if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
                u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
                gfs2_quota_change(ip, -blocks, ouid, ogid);
index f606baf9ba7247e9a5fd9ccfb2cc9426019e589e..a689901963dea43c82b6178a4451c09560061e76 100644 (file)
@@ -666,6 +666,10 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
                        qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
                        qd->qd_qb.qb_limit = qp->qu_limit;
                }
+               if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
+                       qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
+                       qd->qd_qb.qb_value = qp->qu_value;
+               }
        }
 
        /* Write the quota into the quota file on disk */
@@ -1509,7 +1513,7 @@ out:
 }
 
 /* GFS2 only supports a subset of the XFS fields */
-#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD)
+#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT)
 
 static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
                          struct fs_disk_quota *fdq)
@@ -1569,9 +1573,15 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
        if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
            ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
                fdq->d_fieldmask ^= FS_DQ_BSOFT;
+
        if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
            ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
                fdq->d_fieldmask ^= FS_DQ_BHARD;
+
+       if ((fdq->d_fieldmask & FS_DQ_BCOUNT) &&
+           ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value)))
+               fdq->d_fieldmask ^= FS_DQ_BCOUNT;
+
        if (fdq->d_fieldmask == 0)
                goto out_i;
 
@@ -1620,4 +1630,3 @@ const struct quotactl_ops gfs2_quotactl_ops = {
        .get_dqblk      = gfs2_get_dqblk,
        .set_dqblk      = gfs2_set_dqblk,
 };
-
index 33c8407b876f00ceef0741221ebae4ba46ecb426..7293ea27020c680307e0145e863ebbb7eb0d6949 100644 (file)
@@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
        for (rgrps = 0;; rgrps++) {
                loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 
-               if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode))
+               if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
                        break;
                error = gfs2_internal_read(ip, &ra_state, buf, &pos,
                                           sizeof(struct gfs2_rindex));
@@ -583,7 +583,7 @@ static int read_rindex_entry(struct gfs2_inode *ip,
  * Returns: 0 on successful update, error code otherwise
  */
 
-static int gfs2_ri_update(struct gfs2_inode *ip)
+int gfs2_ri_update(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct inode *inode = &ip->i_inode;
@@ -613,46 +613,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
        return 0;
 }
 
-/**
- * gfs2_ri_update_special - Pull in a new resource index from the disk
- *
- * This is a special version that's safe to call from gfs2_inplace_reserve_i.
- * In this case we know that we don't have any resource groups in memory yet.
- *
- * @ip: pointer to the rindex inode
- *
- * Returns: 0 on successful update, error code otherwise
- */
-static int gfs2_ri_update_special(struct gfs2_inode *ip)
-{
-       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct inode *inode = &ip->i_inode;
-       struct file_ra_state ra_state;
-       struct gfs2_rgrpd *rgd;
-       unsigned int max_data = 0;
-       int error;
-
-       file_ra_state_init(&ra_state, inode->i_mapping);
-       for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
-               /* Ignore partials */
-               if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
-                   i_size_read(inode))
-                       break;
-               error = read_rindex_entry(ip, &ra_state);
-               if (error) {
-                       clear_rgrpdi(sdp);
-                       return error;
-               }
-       }
-       list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
-               if (rgd->rd_data > max_data)
-                       max_data = rgd->rd_data;
-       sdp->sd_max_rg_data = max_data;
-
-       sdp->sd_rindex_uptodate = 1;
-       return 0;
-}
-
 /**
  * gfs2_rindex_hold - Grab a lock on the rindex
  * @sdp: The GFS2 superblock
@@ -1226,16 +1186,25 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
                        error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
                else if (!sdp->sd_rgrps) /* We may not have the rindex read
                                            in, so: */
-                       error = gfs2_ri_update_special(ip);
+                       error = gfs2_ri_update(ip);
                if (error)
                        return error;
        }
 
+try_again:
        do {
                error = get_local_rgrp(ip, &last_unlinked);
                /* If there is no space, flushing the log may release some */
-               if (error)
+               if (error) {
+                       if (ip == GFS2_I(sdp->sd_rindex) &&
+                           !sdp->sd_rindex_uptodate) {
+                               error = gfs2_ri_update(ip);
+                               if (error)
+                                       return error;
+                               goto try_again;
+                       }
                        gfs2_log_flush(sdp, NULL);
+               }
        } while (error && tries++ < 3);
 
        if (error) {
index 0e35c0466f9a6c5979a3fe8c339def323bc37fad..50c2bb04369c8dd617fed95513461f6dc3651d0d 100644 (file)
@@ -48,6 +48,7 @@ extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
 
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
+extern int gfs2_ri_update(struct gfs2_inode *ip);
 extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
 extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
 
index 30b58f07c8a6b219fc964efe101ce5f861397885..439b61c03262b767956e23f761b637e0b6905383 100644 (file)
@@ -1296,10 +1296,8 @@ fail:
 
 int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
 {
-       struct inode *inode = &ip->i_inode;
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_ea_location el;
-       struct buffer_head *dibh;
        int error;
 
        error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
@@ -1321,26 +1319,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
        if (error)
                return error;
 
-       error = gfs2_meta_inode_buffer(ip, &dibh);
-       if (error)
-               goto out_trans_end;
-
-       if ((attr->ia_valid & ATTR_SIZE) &&
-           attr->ia_size != i_size_read(inode)) {
-               int error;
-
-               error = vmtruncate(inode, attr->ia_size);
-               gfs2_assert_warn(GFS2_SB(inode), !error);
-       }
-
-       setattr_copy(inode, attr);
-       mark_inode_dirty(inode);
-
-       gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-       gfs2_dinode_out(ip, dibh->b_data);
-       brelse(dibh);
-
-out_trans_end:
+       error = gfs2_setattr_simple(ip, attr);
        gfs2_trans_end(sdp);
        return error;
 }
index f46ee8b0e135eb62d00e45dc30bbee31c61c213a..9da29706f91cd74772169e6391333ae877a1fa05 100644 (file)
@@ -828,7 +828,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
                super->s_journal_seg[i] = segno;
                super->s_journal_ec[i] = ec;
                logfs_set_segment_reserved(sb, segno);
-               err = btree_insert32(head, segno, (void *)1, GFP_KERNEL);
+               err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
                BUG_ON(err); /* mempool should prevent this */
                err = logfs_erase_segment(sb, segno, 1);
                BUG_ON(err); /* FIXME: remount-ro would be nicer */
index 6127baf0e1884760e757d656a772cf13d3953be2..ee99a9f5dfd3ac251b6b824a1032ce45b68b6ed7 100644 (file)
@@ -1994,6 +1994,9 @@ static int do_write_inode(struct inode *inode)
 
        /* FIXME: transaction is part of logfs_block now.  Is that enough? */
        err = logfs_write_buf(master_inode, page, 0);
+       if (err)
+               move_page_to_inode(inode, page);
+
        logfs_put_write_page(page);
        return err;
 }
index f1e962cb3b73084699a182933b7760926c36880e..0d7c5540ad669a55d4def39f21bbe7b42bfb83cc 100644 (file)
@@ -573,11 +573,14 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
        /* this io's submitter should not have unlocked this before we could */
        BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
 
+       if (ocfs2_iocb_is_sem_locked(iocb)) {
+               up_read(&inode->i_alloc_sem);
+               ocfs2_iocb_clear_sem_locked(iocb);
+       }
+
        ocfs2_iocb_clear_rw_locked(iocb);
 
        level = ocfs2_iocb_rw_locked_level(iocb);
-       if (!level)
-               up_read(&inode->i_alloc_sem);
        ocfs2_rw_unlock(inode, level);
 
        if (is_async)
index 76bfdfda691a03b1790ac9670a8bb79d042e7584..eceb456037c11c7a2e68cc2d510908ef781cfdbf 100644 (file)
@@ -68,8 +68,27 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
        else
                clear_bit(1, (unsigned long *)&iocb->private);
 }
+
+/*
+ * Using a named enum representing lock types in terms of #N bit stored in
+ * iocb->private, which is going to be used for communication bewteen
+ * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
+ */
+enum ocfs2_iocb_lock_bits {
+       OCFS2_IOCB_RW_LOCK = 0,
+       OCFS2_IOCB_RW_LOCK_LEVEL,
+       OCFS2_IOCB_SEM,
+       OCFS2_IOCB_NUM_LOCKS
+};
+
 #define ocfs2_iocb_clear_rw_locked(iocb) \
-       clear_bit(0, (unsigned long *)&iocb->private)
+       clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
 #define ocfs2_iocb_rw_locked_level(iocb) \
-       test_bit(1, (unsigned long *)&iocb->private)
+       test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_set_sem_locked(iocb) \
+       set_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_clear_sem_locked(iocb) \
+       clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_is_sem_locked(iocb) \
+       test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
 #endif /* OCFS2_FILE_H */
index c7fba396392d8c0a866c5194204eb1abb49d827d..6c61771469af24ef9e2610e93e65092768b59691 100644 (file)
@@ -113,10 +113,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
        define_mask(QUOTA),
        define_mask(REFCOUNT),
        define_mask(BASTS),
+       define_mask(RESERVATIONS),
+       define_mask(CLUSTER),
        define_mask(ERROR),
        define_mask(NOTICE),
        define_mask(KTHREAD),
-       define_mask(RESERVATIONS),
 };
 
 static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
index ea2ed9f56c94ad0654a5f79259e50a1f046bed97..34d6544357d9718699af25e25aecc48a1f26c9c0 100644 (file)
@@ -81,7 +81,7 @@
 #include <linux/sched.h>
 
 /* bits that are frequently given and infrequently matched in the low word */
-/* NOTE: If you add a flag, you need to also update mlog.c! */
+/* NOTE: If you add a flag, you need to also update masklog.c! */
 #define ML_ENTRY       0x0000000000000001ULL /* func call entry */
 #define ML_EXIT                0x0000000000000002ULL /* func call exit */
 #define ML_TCP         0x0000000000000004ULL /* net cluster/tcp.c */
 #define ML_XATTR       0x0000000020000000ULL /* ocfs2 extended attributes */
 #define ML_QUOTA       0x0000000040000000ULL /* ocfs2 quota operations */
 #define ML_REFCOUNT    0x0000000080000000ULL /* refcount tree operations */
-#define ML_BASTS       0x0000001000000000ULL /* dlmglue asts and basts */
+#define ML_BASTS       0x0000000100000000ULL /* dlmglue asts and basts */
+#define ML_RESERVATIONS        0x0000000200000000ULL /* ocfs2 alloc reservations */
+#define ML_CLUSTER     0x0000000400000000ULL /* cluster stack */
+
 /* bits that are infrequently given and frequently matched in the high word */
-#define ML_ERROR       0x0000000100000000ULL /* sent to KERN_ERR */
-#define ML_NOTICE      0x0000000200000000ULL /* setn to KERN_NOTICE */
-#define ML_KTHREAD     0x0000000400000000ULL /* kernel thread activity */
-#define ML_RESERVATIONS        0x0000000800000000ULL /* ocfs2 alloc reservations */
-#define ML_CLUSTER     0x0000001000000000ULL /* cluster stack */
+#define ML_ERROR       0x1000000000000000ULL /* sent to KERN_ERR */
+#define ML_NOTICE      0x2000000000000000ULL /* setn to KERN_NOTICE */
+#define ML_KTHREAD     0x4000000000000000ULL /* kernel thread activity */
 
 #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
 #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
index c49f6de0e7abb6e096ddc56e795957e0194bb8dd..d417b3f9b0c730e5cbb8d475358e6d8891fdc05c 100644 (file)
@@ -2461,8 +2461,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 
        di->i_dx_root = cpu_to_le64(dr_blkno);
 
+       spin_lock(&OCFS2_I(dir)->ip_lock);
        OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
        di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
+       spin_unlock(&OCFS2_I(dir)->ip_lock);
 
        ocfs2_journal_dirty(handle, di_bh);
 
@@ -4466,8 +4468,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
                goto out_commit;
        }
 
+       spin_lock(&OCFS2_I(dir)->ip_lock);
        OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
        di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
+       spin_unlock(&OCFS2_I(dir)->ip_lock);
        di->i_dx_root = cpu_to_le64(0ULL);
 
        ocfs2_journal_dirty(handle, di_bh);
index f564b0e5f80d8c89e08eeba4a5e133b24cb67373..59f0f6bdfc62110141ca7ad413dc51abadd4b3c8 100644 (file)
@@ -2346,7 +2346,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
  */
 static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
                                      struct dlm_lock_resource *res,
-                                     int *numlocks)
+                                     int *numlocks,
+                                     int *hasrefs)
 {
        int ret;
        int i;
@@ -2356,6 +2357,9 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
 
        assert_spin_locked(&res->spinlock);
 
+       *numlocks = 0;
+       *hasrefs = 0;
+
        ret = -EINVAL;
        if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
                mlog(0, "cannot migrate lockres with unknown owner!\n");
@@ -2386,7 +2390,13 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
        }
 
        *numlocks = count;
-       mlog(0, "migrateable lockres having %d locks\n", *numlocks);
+
+       count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+       if (count < O2NM_MAX_NODES)
+               *hasrefs = 1;
+
+       mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name,
+            res->lockname.len, res->lockname.name, *numlocks, *hasrefs);
 
 leave:
        return ret;
@@ -2408,7 +2418,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
        const char *name;
        unsigned int namelen;
        int mle_added = 0;
-       int numlocks;
+       int numlocks, hasrefs;
        int wake = 0;
 
        if (!dlm_grab(dlm))
@@ -2417,13 +2427,13 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
        name = res->lockname.name;
        namelen = res->lockname.len;
 
-       mlog(0, "migrating %.*s to %u\n", namelen, name, target);
+       mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
 
        /*
         * ensure this lockres is a proper candidate for migration
         */
        spin_lock(&res->spinlock);
-       ret = dlm_is_lockres_migrateable(dlm, res, &numlocks);
+       ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
        if (ret < 0) {
                spin_unlock(&res->spinlock);
                goto leave;
@@ -2431,10 +2441,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
        spin_unlock(&res->spinlock);
 
        /* no work to do */
-       if (numlocks == 0) {
-               mlog(0, "no locks were found on this lockres! done!\n");
+       if (numlocks == 0 && !hasrefs)
                goto leave;
-       }
 
        /*
         * preallocate up front
@@ -2459,14 +2467,14 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
         * find a node to migrate the lockres to
         */
 
-       mlog(0, "picking a migration node\n");
        spin_lock(&dlm->spinlock);
        /* pick a new node */
        if (!test_bit(target, dlm->domain_map) ||
            target >= O2NM_MAX_NODES) {
                target = dlm_pick_migration_target(dlm, res);
        }
-       mlog(0, "node %u chosen for migration\n", target);
+       mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
+            namelen, name, target);
 
        if (target >= O2NM_MAX_NODES ||
            !test_bit(target, dlm->domain_map)) {
@@ -2667,7 +2675,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 {
        int ret;
        int lock_dropped = 0;
-       int numlocks;
+       int numlocks, hasrefs;
 
        spin_lock(&res->spinlock);
        if (res->owner != dlm->node_num) {
@@ -2681,8 +2689,8 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
        }
 
        /* No need to migrate a lockres having no locks */
-       ret = dlm_is_lockres_migrateable(dlm, res, &numlocks);
-       if (ret >= 0 && numlocks == 0) {
+       ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
+       if (ret >= 0 && numlocks == 0 && !hasrefs) {
                spin_unlock(&res->spinlock);
                goto leave;
        }
@@ -2915,6 +2923,12 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
                }
                queue++;
        }
+
+       nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+       if (nodenum < O2NM_MAX_NODES) {
+               spin_unlock(&res->spinlock);
+               return nodenum;
+       }
        spin_unlock(&res->spinlock);
        mlog(0, "have not found a suitable target yet! checking domain map\n");
 
index 77b4c04a2809831e7209d1be3f8e860a0ba7edc8..f6cba566429d314fe82f9f433889d1e22111462a 100644 (file)
@@ -2241,11 +2241,15 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 
        mutex_lock(&inode->i_mutex);
 
+       ocfs2_iocb_clear_sem_locked(iocb);
+
 relock:
        /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
        if (direct_io) {
                down_read(&inode->i_alloc_sem);
                have_alloc_sem = 1;
+               /* communicate with ocfs2_dio_end_io */
+               ocfs2_iocb_set_sem_locked(iocb);
        }
 
        /*
@@ -2382,8 +2386,10 @@ out:
                ocfs2_rw_unlock(inode, rw_level);
 
 out_sems:
-       if (have_alloc_sem)
+       if (have_alloc_sem) {
                up_read(&inode->i_alloc_sem);
+               ocfs2_iocb_clear_sem_locked(iocb);
+       }
 
        mutex_unlock(&inode->i_mutex);
 
@@ -2527,6 +2533,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
                goto bail;
        }
 
+       ocfs2_iocb_clear_sem_locked(iocb);
+
        /*
         * buffered reads protect themselves in ->readpage().  O_DIRECT reads
         * need locks to protect pending reads from racing with truncate.
@@ -2534,6 +2542,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
        if (filp->f_flags & O_DIRECT) {
                down_read(&inode->i_alloc_sem);
                have_alloc_sem = 1;
+               ocfs2_iocb_set_sem_locked(iocb);
 
                ret = ocfs2_rw_lock(inode, 0);
                if (ret < 0) {
@@ -2575,8 +2584,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
        }
 
 bail:
-       if (have_alloc_sem)
+       if (have_alloc_sem) {
                up_read(&inode->i_alloc_sem);
+               ocfs2_iocb_clear_sem_locked(iocb);
+       }
        if (rw_level != -1)
                ocfs2_rw_unlock(inode, rw_level);
        mlog_exit(ret);
index c2e4f8222e2f0ac707de217555bee7edd51f76d6..bf2e7764920e92e3d621a94caac7124b3fa15914 100644 (file)
@@ -350,7 +350,7 @@ enum {
 #define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE
        NUM_SYSTEM_INODES
 };
-#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE
+#define NUM_GLOBAL_SYSTEM_INODES OCFS2_FIRST_LOCAL_SYSTEM_INODE
 #define NUM_LOCAL_SYSTEM_INODES        \
                (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE)
 
index 182845147fe45bde8f5607a799f23cc1e2818117..08cba2c3b61240e085b9861967af5bd0adb4d227 100644 (file)
@@ -1407,6 +1407,82 @@ static const struct file_operations proc_pid_sched_operations = {
 
 #endif
 
+#ifdef CONFIG_SCHED_AUTOGROUP
+/*
+ * Print out autogroup related information:
+ */
+static int sched_autogroup_show(struct seq_file *m, void *v)
+{
+       struct inode *inode = m->private;
+       struct task_struct *p;
+
+       p = get_proc_task(inode);
+       if (!p)
+               return -ESRCH;
+       proc_sched_autogroup_show_task(p, m);
+
+       put_task_struct(p);
+
+       return 0;
+}
+
+static ssize_t
+sched_autogroup_write(struct file *file, const char __user *buf,
+           size_t count, loff_t *offset)
+{
+       struct inode *inode = file->f_path.dentry->d_inode;
+       struct task_struct *p;
+       char buffer[PROC_NUMBUF];
+       long nice;
+       int err;
+
+       memset(buffer, 0, sizeof(buffer));
+       if (count > sizeof(buffer) - 1)
+               count = sizeof(buffer) - 1;
+       if (copy_from_user(buffer, buf, count))
+               return -EFAULT;
+
+       err = strict_strtol(strstrip(buffer), 0, &nice);
+       if (err)
+               return -EINVAL;
+
+       p = get_proc_task(inode);
+       if (!p)
+               return -ESRCH;
+
+       err = nice;
+       err = proc_sched_autogroup_set_nice(p, &err);
+       if (err)
+               count = err;
+
+       put_task_struct(p);
+
+       return count;
+}
+
+static int sched_autogroup_open(struct inode *inode, struct file *filp)
+{
+       int ret;
+
+       ret = single_open(filp, sched_autogroup_show, NULL);
+       if (!ret) {
+               struct seq_file *m = filp->private_data;
+
+               m->private = inode;
+       }
+       return ret;
+}
+
+static const struct file_operations proc_pid_sched_autogroup_operations = {
+       .open           = sched_autogroup_open,
+       .read           = seq_read,
+       .write          = sched_autogroup_write,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+#endif /* CONFIG_SCHED_AUTOGROUP */
+
 static ssize_t comm_write(struct file *file, const char __user *buf,
                                size_t count, loff_t *offset)
 {
@@ -2732,6 +2808,9 @@ static const struct pid_entry tgid_base_stuff[] = {
        INF("limits",     S_IRUGO, proc_pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
+#endif
+#ifdef CONFIG_SCHED_AUTOGROUP
+       REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
 #endif
        REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
index 36d57f74cd01c6c126ee2f7c2ea2c98f66868b2f..51494e6b55487f30496c8870165dd75f8ba4c7b1 100644 (file)
@@ -81,10 +81,10 @@ extern int wait_for_completion_interruptible(struct completion *x);
 extern int wait_for_completion_killable(struct completion *x);
 extern unsigned long wait_for_completion_timeout(struct completion *x,
                                                   unsigned long timeout);
-extern unsigned long wait_for_completion_interruptible_timeout(
-                       struct completion *x, unsigned long timeout);
-extern unsigned long wait_for_completion_killable_timeout(
-                       struct completion *x, unsigned long timeout);
+extern long wait_for_completion_interruptible_timeout(
+       struct completion *x, unsigned long timeout);
+extern long wait_for_completion_killable_timeout(
+       struct completion *x, unsigned long timeout);
 extern bool try_wait_for_completion(struct completion *x);
 extern bool completion_done(struct completion *x);
 
index 9d8688b92d8b02c46980f9a4397e4b2d0c313413..8cd00ad98d3773a4afa44e5e7ad6eaac184e6d71 100644 (file)
@@ -824,6 +824,8 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
 #ifdef CONFIG_DMA_ENGINE
 enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
 void dma_issue_pending_all(void);
+struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
+void dma_release_channel(struct dma_chan *chan);
 #else
 static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 {
@@ -831,7 +833,14 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript
 }
 static inline void dma_issue_pending_all(void)
 {
-       do { } while (0);
+}
+static inline struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask,
+                                             dma_filter_fn fn, void *fn_param)
+{
+       return NULL;
+}
+static inline void dma_release_channel(struct dma_chan *chan)
+{
 }
 #endif
 
@@ -842,8 +851,6 @@ void dma_async_device_unregister(struct dma_device *device);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
 struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
 #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
-struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
-void dma_release_channel(struct dma_chan *chan);
 
 /* --- Helper iov-locking functions --- */
 
index 8beabb958f61d5147c8893f1e780415a91fcb2e6..47e3997f7b5cf39233283ff43d84937daa502c2f 100644 (file)
@@ -154,12 +154,14 @@ enum {
        TRACE_EVENT_FL_ENABLED_BIT,
        TRACE_EVENT_FL_FILTERED_BIT,
        TRACE_EVENT_FL_RECORDED_CMD_BIT,
+       TRACE_EVENT_FL_CAP_ANY_BIT,
 };
 
 enum {
        TRACE_EVENT_FL_ENABLED          = (1 << TRACE_EVENT_FL_ENABLED_BIT),
        TRACE_EVENT_FL_FILTERED         = (1 << TRACE_EVENT_FL_FILTERED_BIT),
        TRACE_EVENT_FL_RECORDED_CMD     = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
+       TRACE_EVENT_FL_CAP_ANY          = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 };
 
 struct ftrace_event_call {
@@ -196,6 +198,14 @@ struct ftrace_event_call {
 #endif
 };
 
+#define __TRACE_EVENT_FLAGS(name, value)                               \
+       static int __init trace_init_flags_##name(void)                 \
+       {                                                               \
+               event_##name.flags = value;                             \
+               return 0;                                               \
+       }                                                               \
+       early_initcall(trace_init_flags_##name);
+
 #define PERF_MAX_TRACE_SIZE    2048
 
 #define MAX_FILTER_PRED                32
@@ -215,6 +225,10 @@ enum {
        FILTER_PTR_STRING,
 };
 
+#define EVENT_STORAGE_SIZE 128
+extern struct mutex event_storage_mutex;
+extern char event_storage[EVENT_STORAGE_SIZE];
+
 extern int trace_event_raw_init(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
                              const char *name, int offset, int size,
index fd0c1b857d3dbcd9c074e461ded81b69f3ab897d..330586ffffbbccad534b1f81b7309d766f8d48fb 100644 (file)
@@ -22,7 +22,7 @@
 #include <linux/wait.h>
 #include <linux/percpu.h>
 #include <linux/timer.h>
-
+#include <linux/timerqueue.h>
 
 struct hrtimer_clock_base;
 struct hrtimer_cpu_base;
@@ -79,8 +79,8 @@ enum hrtimer_restart {
 
 /**
  * struct hrtimer - the basic hrtimer structure
- * @node:      red black tree node for time ordered insertion
- * @_expires:  the absolute expiry time in the hrtimers internal
+ * @node:      timerqueue node, which also manages node.expires,
+ *             the absolute expiry time in the hrtimers internal
  *             representation. The time is related to the clock on
  *             which the timer is based. Is setup by adding
  *             slack to the _softexpires value. For non range timers
@@ -101,8 +101,7 @@ enum hrtimer_restart {
  * The hrtimer structure must be initialized by hrtimer_init()
  */
 struct hrtimer {
-       struct rb_node                  node;
-       ktime_t                         _expires;
+       struct timerqueue_node          node;
        ktime_t                         _softexpires;
        enum hrtimer_restart            (*function)(struct hrtimer *);
        struct hrtimer_clock_base       *base;
@@ -141,8 +140,7 @@ struct hrtimer_sleeper {
 struct hrtimer_clock_base {
        struct hrtimer_cpu_base *cpu_base;
        clockid_t               index;
-       struct rb_root          active;
-       struct rb_node          *first;
+       struct timerqueue_head  active;
        ktime_t                 resolution;
        ktime_t                 (*get_time)(void);
        ktime_t                 softirq_time;
@@ -158,7 +156,6 @@ struct hrtimer_clock_base {
  * @lock:              lock protecting the base and associated clock bases
  *                     and timers
  * @clock_base:                array of clock bases for this cpu
- * @curr_timer:                the timer which is executing a callback right now
  * @expires_next:      absolute time of the next event which was scheduled
  *                     via clock_set_next_event()
  * @hres_active:       State of high resolution mode
@@ -184,43 +181,43 @@ struct hrtimer_cpu_base {
 
 static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
 {
-       timer->_expires = time;
+       timer->node.expires = time;
        timer->_softexpires = time;
 }
 
 static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta)
 {
        timer->_softexpires = time;
-       timer->_expires = ktime_add_safe(time, delta);
+       timer->node.expires = ktime_add_safe(time, delta);
 }
 
 static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta)
 {
        timer->_softexpires = time;
-       timer->_expires = ktime_add_safe(time, ns_to_ktime(delta));
+       timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta));
 }
 
 static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64)
 {
-       timer->_expires.tv64 = tv64;
+       timer->node.expires.tv64 = tv64;
        timer->_softexpires.tv64 = tv64;
 }
 
 static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time)
 {
-       timer->_expires = ktime_add_safe(timer->_expires, time);
+       timer->node.expires = ktime_add_safe(timer->node.expires, time);
        timer->_softexpires = ktime_add_safe(timer->_softexpires, time);
 }
 
 static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns)
 {
-       timer->_expires = ktime_add_ns(timer->_expires, ns);
+       timer->node.expires = ktime_add_ns(timer->node.expires, ns);
        timer->_softexpires = ktime_add_ns(timer->_softexpires, ns);
 }
 
 static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer)
 {
-       return timer->_expires;
+       return timer->node.expires;
 }
 
 static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
@@ -230,7 +227,7 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
 
 static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer)
 {
-       return timer->_expires.tv64;
+       return timer->node.expires.tv64;
 }
 static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
 {
@@ -239,12 +236,12 @@ static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
 
 static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
 {
-       return ktime_to_ns(timer->_expires);
+       return ktime_to_ns(timer->node.expires);
 }
 
 static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
 {
-    return ktime_sub(timer->_expires, timer->base->get_time());
+       return ktime_sub(timer->node.expires, timer->base->get_time());
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
index 1f8c06ce0fa66b83760863735eaf1209908205d7..caa151fbebb74c661289a69ffb52762435178d53 100644 (file)
 #include <linux/securebits.h>
 #include <net/net_namespace.h>
 
+#ifdef CONFIG_SMP
+# define INIT_PUSHABLE_TASKS(tsk)                                      \
+       .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO),
+#else
+# define INIT_PUSHABLE_TASKS(tsk)
+#endif
+
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
@@ -83,6 +90,12 @@ extern struct group_info init_groups;
  */
 # define CAP_INIT_BSET  CAP_FULL_SET
 
+#ifdef CONFIG_RCU_BOOST
+#define INIT_TASK_RCU_BOOST()                                          \
+       .rcu_boost_mutex = NULL,
+#else
+#define INIT_TASK_RCU_BOOST()
+#endif
 #ifdef CONFIG_TREE_PREEMPT_RCU
 #define INIT_TASK_RCU_TREE_PREEMPT()                                   \
        .rcu_blocked_node = NULL,
@@ -94,7 +107,8 @@ extern struct group_info init_groups;
        .rcu_read_lock_nesting = 0,                                     \
        .rcu_read_unlock_special = 0,                                   \
        .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),           \
-       INIT_TASK_RCU_TREE_PREEMPT()
+       INIT_TASK_RCU_TREE_PREEMPT()                                    \
+       INIT_TASK_RCU_BOOST()
 #else
 #define INIT_TASK_RCU_PREEMPT(tsk)
 #endif
@@ -137,7 +151,7 @@ extern struct cred init_cred;
                .nr_cpus_allowed = NR_CPUS,                             \
        },                                                              \
        .tasks          = LIST_HEAD_INIT(tsk.tasks),                    \
-       .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \
+       INIT_PUSHABLE_TASKS(tsk)                                        \
        .ptraced        = LIST_HEAD_INIT(tsk.ptraced),                  \
        .ptrace_entry   = LIST_HEAD_INIT(tsk.ptrace_entry),             \
        .real_parent    = &tsk,                                         \
index 79d0c4f6d0719452c20494b1439d0d695e212e90..55e0d4253e4927eb67254f38137b2a9e787afa9d 100644 (file)
@@ -114,15 +114,15 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
 struct irqaction {
        irq_handler_t handler;
        unsigned long flags;
-       const char *name;
        void *dev_id;
        struct irqaction *next;
        int irq;
-       struct proc_dir_entry *dir;
        irq_handler_t thread_fn;
        struct task_struct *thread;
        unsigned long thread_flags;
-};
+       const char *name;
+       struct proc_dir_entry *dir;
+} ____cacheline_internodealigned_in_smp;
 
 extern irqreturn_t no_action(int cpl, void *dev_id);
 
index e7d1b2e0070d3570b7022877a79fa2f0ed081507..b78edb58ee66164e756b4789baf71ab86e8684c4 100644 (file)
@@ -275,7 +275,9 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
 extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
 extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
 extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
-extern int  arch_optimize_kprobe(struct optimized_kprobe *op);
+extern void arch_optimize_kprobes(struct list_head *oplist);
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+                                   struct list_head *done_list);
 extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
 extern kprobe_opcode_t *get_optinsn_slot(void);
 extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
index 685ea65eb803fddf1c47ee3e420fe3056278685b..ce0775aa64c376b8980a6af70ef59f14d19f1ba0 100644 (file)
@@ -81,16 +81,41 @@ struct kthread_work {
 #define DEFINE_KTHREAD_WORK(work, fn)                                  \
        struct kthread_work work = KTHREAD_WORK_INIT(work, fn)
 
-static inline void init_kthread_worker(struct kthread_worker *worker)
-{
-       *worker = (struct kthread_worker)KTHREAD_WORKER_INIT(*worker);
-}
-
-static inline void init_kthread_work(struct kthread_work *work,
-                                    kthread_work_func_t fn)
-{
-       *work = (struct kthread_work)KTHREAD_WORK_INIT(*work, fn);
-}
+/*
+ * kthread_worker.lock and kthread_work.done need their own lockdep class
+ * keys if they are defined on stack with lockdep enabled.  Use the
+ * following macros when defining them on stack.
+ */
+#ifdef CONFIG_LOCKDEP
+# define KTHREAD_WORKER_INIT_ONSTACK(worker)                           \
+       ({ init_kthread_worker(&worker); worker; })
+# define DEFINE_KTHREAD_WORKER_ONSTACK(worker)                         \
+       struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker)
+# define KTHREAD_WORK_INIT_ONSTACK(work, fn)                           \
+       ({ init_kthread_work((&work), fn); work; })
+# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn)                         \
+       struct kthread_work work = KTHREAD_WORK_INIT_ONSTACK(work, fn)
+#else
+# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker)
+# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn) DEFINE_KTHREAD_WORK(work, fn)
+#endif
+
+extern void __init_kthread_worker(struct kthread_worker *worker,
+                       const char *name, struct lock_class_key *key);
+
+#define init_kthread_worker(worker)                                    \
+       do {                                                            \
+               static struct lock_class_key __key;                     \
+               __init_kthread_worker((worker), "("#worker")->lock", &__key); \
+       } while (0)
+
+#define init_kthread_work(work, fn)                                    \
+       do {                                                            \
+               memset((work), 0, sizeof(struct kthread_work));         \
+               INIT_LIST_HEAD(&(work)->node);                          \
+               (work)->func = (fn);                                    \
+               init_waitqueue_head(&(work)->done);                     \
+       } while (0)
 
 int kthread_worker_fn(void *worker_ptr);
 
index 7575bbbdf2a2b8e6a716fb6252c3d8e958756f52..8b17fd8c790d8601f8aff0a33c7b909984e30545 100644 (file)
@@ -308,6 +308,9 @@ struct module
        /* The size of the executable code in each section.  */
        unsigned int init_text_size, core_text_size;
 
+       /* Size of RO sections of the module (text+rodata) */
+       unsigned int init_ro_size, core_ro_size;
+
        /* Arch-specific module values */
        struct mod_arch_specific arch;
 
@@ -672,7 +675,6 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
 {
        return 0;
 }
-
 #endif /* CONFIG_MODULES */
 
 #ifdef CONFIG_SYSFS
@@ -687,6 +689,13 @@ extern int module_sysfs_initialized;
 
 #define __MODULE_STRING(x) __stringify(x)
 
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+extern void set_all_modules_text_rw(void);
+extern void set_all_modules_text_ro(void);
+#else
+static inline void set_all_modules_text_rw(void) { }
+static inline void set_all_modules_text_ro(void) { }
+#endif
 
 #ifdef CONFIG_GENERIC_BUG
 void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
index f363bc8fdc74c821c99aa59d5bfcb9554c012c9a..94b48bd40dd735f77963fcd31797d32bb68b3379 100644 (file)
@@ -160,4 +160,8 @@ extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
+#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
+#define arch_mutex_cpu_relax() cpu_relax()
+#endif
+
 #endif
index 123566912d7312f276bf975cd1a48c97b2ffa830..e2b9e63afa68b53f45fdc60c1a3f76cf0f338705 100644 (file)
@@ -70,7 +70,7 @@ struct nlmsghdr {
    Check               NLM_F_EXCL
  */
 
-#define NLMSG_ALIGNTO  4
+#define NLMSG_ALIGNTO  4U
 #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
 #define NLMSG_HDRLEN    ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
 #define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN))
index 06aab5eee134cd56c4bade9005912fa3a785a327..c536f8545f74c11e345943187f201bfc25e48baa 100644 (file)
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
  */
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
 #include <asm/nmi.h>
 extern void touch_nmi_watchdog(void);
-extern void acpi_nmi_disable(void);
-extern void acpi_nmi_enable(void);
 #else
-#ifndef CONFIG_HARDLOCKUP_DETECTOR
 static inline void touch_nmi_watchdog(void)
 {
        touch_softlockup_watchdog();
 }
-#else
-extern void touch_nmi_watchdog(void);
-#endif
-static inline void acpi_nmi_disable(void) { }
-static inline void acpi_nmi_enable(void) { }
 #endif
 
 /*
index 4f1279e105ee143e4317219b3cb093bc8bbdd954..dda5b0a3ff6014b8a0741a186ed0e3968b63d298 100644 (file)
@@ -215,8 +215,9 @@ struct perf_event_attr {
                                 */
                                precise_ip     :  2, /* skid constraint       */
                                mmap_data      :  1, /* non-exec mmap data    */
+                               sample_id_all  :  1, /* sample_type all events */
 
-                               __reserved_1   : 46;
+                               __reserved_1   : 45;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -327,6 +328,15 @@ struct perf_event_header {
 enum perf_event_type {
 
        /*
+        * If perf_event_attr.sample_id_all is set then all event types will
+        * have the sample_type selected fields related to where/when
+        * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
+        * described in PERF_RECORD_SAMPLE below, it will be stashed just after
+        * the perf_event_header and the fields already present for the existing
+        * fields, i.e. at the end of the payload. That way a newer perf.data
+        * file will be supported by older perf tools, with these new optional
+        * fields being ignored.
+        *
         * The MMAP events record the PROT_EXEC mappings so that we can
         * correlate userspace IPs to code. They have the following structure:
         *
@@ -578,6 +588,10 @@ struct perf_event;
 struct pmu {
        struct list_head                entry;
 
+       struct device                   *dev;
+       char                            *name;
+       int                             type;
+
        int * __percpu                  pmu_disable_count;
        struct perf_cpu_context * __percpu pmu_cpu_context;
        int                             task_ctx_nr;
@@ -758,6 +772,9 @@ struct perf_event {
        u64                             shadow_ctx_time;
 
        struct perf_event_attr          attr;
+       u16                             header_size;
+       u16                             id_header_size;
+       u16                             read_size;
        struct hw_perf_event            hw;
 
        struct perf_event_context       *ctx;
@@ -903,7 +920,7 @@ struct perf_output_handle {
 
 #ifdef CONFIG_PERF_EVENTS
 
-extern int perf_pmu_register(struct pmu *pmu);
+extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
 extern void perf_pmu_unregister(struct pmu *pmu);
 
 extern int perf_num_counters(void);
@@ -970,6 +987,11 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
                                 struct perf_sample_data *data,
                                 struct pt_regs *regs);
 
+static inline bool is_sampling_event(struct perf_event *event)
+{
+       return event->attr.sample_period != 0;
+}
+
 /*
  * Return 1 for a software event, 0 for a hardware event
  */
index f31ef61f1c650b585bd6faf969f7cec754dffe2d..2dea94fc44026a1048f912913be298b8df179873 100644 (file)
@@ -241,11 +241,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
 #define list_first_entry_rcu(ptr, type, member) \
        list_entry_rcu((ptr)->next, type, member)
 
-#define __list_for_each_rcu(pos, head) \
-       for (pos = rcu_dereference_raw(list_next_rcu(head)); \
-               pos != (head); \
-               pos = rcu_dereference_raw(list_next_rcu((pos)))
-
 /**
  * list_for_each_entry_rcu     -       iterate over rcu list of given type
  * @pos:       the type * to use as a loop cursor.
index 03cda7bed98587b128c5a9953316644a8debb4d2..af5614856285d32e0f07d3ca7e7294b03b9b27b7 100644 (file)
@@ -47,6 +47,8 @@
 extern int rcutorture_runnable; /* for sysctl */
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
+#define UINT_CMP_GE(a, b)      (UINT_MAX / 2 >= (a) - (b))
+#define UINT_CMP_LT(a, b)      (UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b)     (ULONG_MAX / 2 >= (a) - (b))
 #define ULONG_CMP_LT(a, b)     (ULONG_MAX / 2 < (a) - (b))
 
@@ -66,7 +68,6 @@ extern void call_rcu_sched(struct rcu_head *head,
 extern void synchronize_sched(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
-extern void synchronize_sched_expedited(void);
 extern int sched_expedited_torture_stats(char *page);
 
 static inline void __rcu_read_lock_bh(void)
@@ -118,7 +119,6 @@ static inline int rcu_preempt_depth(void)
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
 /* Internal to kernel */
-extern void rcu_init(void);
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
 extern void rcu_check_callbacks(int cpu, int user);
index 13877cb93a6000043f11a6704f2d90b0cc04552d..30ebd7c8d874b4dfeb9c9c9e5c5e857fbb43ab62 100644 (file)
@@ -27,7 +27,9 @@
 
 #include <linux/cache.h>
 
-#define rcu_init_sched()       do { } while (0)
+static inline void rcu_init(void)
+{
+}
 
 #ifdef CONFIG_TINY_RCU
 
@@ -58,6 +60,11 @@ static inline void synchronize_rcu_bh_expedited(void)
        synchronize_sched();
 }
 
+static inline void synchronize_sched_expedited(void)
+{
+       synchronize_sched();
+}
+
 #ifdef CONFIG_TINY_RCU
 
 static inline void rcu_preempt_note_context_switch(void)
@@ -125,16 +132,12 @@ static inline void rcu_cpu_stall_reset(void)
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-
 extern int rcu_scheduler_active __read_mostly;
 extern void rcu_scheduler_starting(void);
-
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
 static inline void rcu_scheduler_starting(void)
 {
 }
-
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 #endif /* __LINUX_RCUTINY_H */
index 95518e6287946177e0eceb5cbf201ebfcaf0e072..3a933482734aeccbafc7a0bb735be11ede47cbd2 100644 (file)
@@ -30,6 +30,7 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
+extern void rcu_init(void);
 extern void rcu_note_context_switch(int cpu);
 extern int rcu_needs_cpu(int cpu);
 extern void rcu_cpu_stall_reset(void);
@@ -47,6 +48,7 @@ static inline void exit_rcu(void)
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 extern void synchronize_rcu_bh(void);
+extern void synchronize_sched_expedited(void);
 extern void synchronize_rcu_expedited(void);
 
 static inline void synchronize_rcu_bh_expedited(void)
index 223874538b33208e3c5ff11710f3161d58b4aef2..777cd01e240ee0fca7a8b6a76c74137d0dcfaaa9 100644 (file)
@@ -316,6 +316,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
                                  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern int softlockup_thresh;
+void lockup_detector_init(void);
 #else
 static inline void touch_softlockup_watchdog(void)
 {
@@ -326,6 +327,9 @@ static inline void touch_softlockup_watchdog_sync(void)
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
+static inline void lockup_detector_init(void)
+{
+}
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK
@@ -509,6 +513,8 @@ struct thread_group_cputimer {
        spinlock_t lock;
 };
 
+struct autogroup;
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -576,6 +582,9 @@ struct signal_struct {
 
        struct tty_struct *tty; /* NULL if no tty */
 
+#ifdef CONFIG_SCHED_AUTOGROUP
+       struct autogroup *autogroup;
+#endif
        /*
         * Cumulative resource counters for dead threads in the group,
         * and for reaped dead child processes forked by this group.
@@ -1229,13 +1238,18 @@ struct task_struct {
 #ifdef CONFIG_TREE_PREEMPT_RCU
        struct rcu_node *rcu_blocked_node;
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifdef CONFIG_RCU_BOOST
+       struct rt_mutex *rcu_boost_mutex;
+#endif /* #ifdef CONFIG_RCU_BOOST */
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
        struct sched_info sched_info;
 #endif
 
        struct list_head tasks;
+#ifdef CONFIG_SMP
        struct plist_node pushable_tasks;
+#endif
 
        struct mm_struct *mm, *active_mm;
 #if defined(SPLIT_RSS_COUNTING)
@@ -1759,7 +1773,8 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
-#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
+#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */
+#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
@@ -1767,7 +1782,10 @@ static inline void rcu_copy_process(struct task_struct *p)
        p->rcu_read_unlock_special = 0;
 #ifdef CONFIG_TREE_PREEMPT_RCU
        p->rcu_blocked_node = NULL;
-#endif
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifdef CONFIG_RCU_BOOST
+       p->rcu_boost_mutex = NULL;
+#endif /* #ifdef CONFIG_RCU_BOOST */
        INIT_LIST_HEAD(&p->rcu_node_entry);
 }
 
@@ -1872,14 +1890,11 @@ extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 
 #ifdef CONFIG_HOTPLUG_CPU
-extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
 extern void idle_task_exit(void);
 #else
 static inline void idle_task_exit(void) {}
 #endif
 
-extern void sched_idle_next(void);
-
 #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
 extern void wake_up_idle_cpu(int cpu);
 #else
@@ -1889,8 +1904,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_shares_ratelimit;
-extern unsigned int sysctl_sched_shares_thresh;
 extern unsigned int sysctl_sched_child_runs_first;
 
 enum sched_tunable_scaling {
@@ -1906,6 +1919,7 @@ extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
 extern unsigned int sysctl_sched_time_avg;
 extern unsigned int sysctl_timer_migration;
+extern unsigned int sysctl_sched_shares_window;
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *length,
@@ -1931,6 +1945,24 @@ int sched_rt_handler(struct ctl_table *table, int write,
 
 extern unsigned int sysctl_sched_compat_yield;
 
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern unsigned int sysctl_sched_autogroup_enabled;
+
+extern void sched_autogroup_create_attach(struct task_struct *p);
+extern void sched_autogroup_detach(struct task_struct *p);
+extern void sched_autogroup_fork(struct signal_struct *sig);
+extern void sched_autogroup_exit(struct signal_struct *sig);
+#ifdef CONFIG_PROC_FS
+extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
+extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice);
+#endif
+#else
+static inline void sched_autogroup_create_attach(struct task_struct *p) { }
+static inline void sched_autogroup_detach(struct task_struct *p) { }
+static inline void sched_autogroup_fork(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+#endif
+
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
 extern void rt_mutex_setprio(struct task_struct *p, int prio);
@@ -1949,9 +1981,10 @@ extern int task_nice(const struct task_struct *p);
 extern int can_nice(const struct task_struct *p, const int nice);
 extern int task_curr(const struct task_struct *p);
 extern int idle_cpu(int cpu);
-extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
+extern int sched_setscheduler(struct task_struct *, int,
+                             const struct sched_param *);
 extern int sched_setscheduler_nocheck(struct task_struct *, int,
-                                     struct sched_param *);
+                                     const struct sched_param *);
 extern struct task_struct *idle_task(int cpu);
 extern struct task_struct *curr_task(int cpu);
 extern void set_curr_task(int cpu, struct task_struct *p);
index 7f770c638e99d670840ed99856dce0b07a660f8b..fe817918b30e49ff96648394d99d8fc85eebaac8 100644 (file)
@@ -77,6 +77,8 @@
 #define SFI_OEM_ID_SIZE                6
 #define SFI_OEM_TABLE_ID_SIZE  8
 
+#define SFI_NAME_LEN           16
+
 #define SFI_SYST_SEARCH_BEGIN          0x000E0000
 #define SFI_SYST_SEARCH_END            0x000FFFFF
 
@@ -156,13 +158,13 @@ struct sfi_device_table_entry {
        u16     addr;
        u8      irq;
        u32     max_freq;
-       char    name[16];
+       char    name[SFI_NAME_LEN];
 } __packed;
 
 struct sfi_gpio_table_entry {
-       char    controller_name[16];
+       char    controller_name[SFI_NAME_LEN];
        u16     pin_no;
-       char    pin_name[16];
+       char    pin_name[SFI_NAME_LEN];
 } __packed;
 
 typedef int (*sfi_table_handler) (struct sfi_table_header *table);
index 51efbef38fb0e204cfddb61b56619d52cefab623..25310f1d7f3773c540e51e7103a4edfa98db7a33 100644 (file)
@@ -2,6 +2,7 @@
 #define __LINUX_STACKTRACE_H
 
 struct task_struct;
+struct pt_regs;
 
 #ifdef CONFIG_STACKTRACE
 struct task_struct;
@@ -13,7 +14,8 @@ struct stack_trace {
 };
 
 extern void save_stack_trace(struct stack_trace *trace);
-extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp);
+extern void save_stack_trace_regs(struct stack_trace *trace,
+                                 struct pt_regs *regs);
 extern void save_stack_trace_tsk(struct task_struct *tsk,
                                struct stack_trace *trace);
 
index cacc27a0e285163d9a8727a4131ffed478b8f46c..18cd0684fc4ec4bb2e6fb52ed6a7838737688c17 100644 (file)
@@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 #define SYSCALL_TRACE_ENTER_EVENT(sname)                               \
        static struct syscall_metadata                                  \
        __attribute__((__aligned__(4))) __syscall_meta_##sname;         \
-       static struct ftrace_event_call                                 \
-       __attribute__((__aligned__(4))) event_enter_##sname;            \
        static struct ftrace_event_call __used                          \
          __attribute__((__aligned__(4)))                               \
          __attribute__((section("_ftrace_events")))                    \
@@ -137,13 +135,12 @@ extern struct trace_event_functions exit_syscall_print_funcs;
                .class                  = &event_class_syscall_enter,   \
                .event.funcs            = &enter_syscall_print_funcs,   \
                .data                   = (void *)&__syscall_meta_##sname,\
-       }
+       };                                                              \
+       __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)                                        \
        static struct syscall_metadata                                  \
        __attribute__((__aligned__(4))) __syscall_meta_##sname;         \
-       static struct ftrace_event_call                                 \
-       __attribute__((__aligned__(4))) event_exit_##sname;             \
        static struct ftrace_event_call __used                          \
          __attribute__((__aligned__(4)))                               \
          __attribute__((section("_ftrace_events")))                    \
@@ -152,7 +149,8 @@ extern struct trace_event_functions exit_syscall_print_funcs;
                .class                  = &event_class_syscall_exit,    \
                .event.funcs            = &exit_syscall_print_funcs,    \
                .data                   = (void *)&__syscall_meta_##sname,\
-       }
+       };                                                              \
+       __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_METADATA(sname, nb)                            \
        SYSCALL_TRACE_ENTER_EVENT(sname);                       \
index 341dddb55090853430c39bd76a081bc2125c8fe5..2466e550a41d40beefe6aa1b5ed4560ac076bc29 100644 (file)
@@ -33,7 +33,7 @@
  */
 
 
-#define TASKSTATS_VERSION      7
+#define TASKSTATS_VERSION      8
 #define TS_COMM_LEN            32      /* should be >= TASK_COMM_LEN
                                         * in linux/sched.h */
 
@@ -188,6 +188,7 @@ enum {
        TASKSTATS_TYPE_STATS,           /* taskstats structure */
        TASKSTATS_TYPE_AGGR_PID,        /* contains pid + stats */
        TASKSTATS_TYPE_AGGR_TGID,       /* contains tgid + stats */
+       TASKSTATS_TYPE_NULL,            /* contains nothing */
        __TASKSTATS_TYPE_MAX,
 };
 
index 38cf093ef62c745d9f06e1038127ef61834796a1..6abd9138beda57f7555b96b9fa0d51c60edaa50f 100644 (file)
@@ -24,9 +24,9 @@ struct timer_list {
        int slack;
 
 #ifdef CONFIG_TIMER_STATS
+       int start_pid;
        void *start_site;
        char start_comm[16];
-       int start_pid;
 #endif
 #ifdef CONFIG_LOCKDEP
        struct lockdep_map lockdep_map;
@@ -48,12 +48,38 @@ extern struct tvec_base boot_tvec_bases;
 #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
 #endif
 
+/*
+ * Note that all tvec_bases are 2 byte aligned and lower bit of
+ * base in timer_list is guaranteed to be zero. Use the LSB to
+ * indicate whether the timer is deferrable.
+ *
+ * A deferrable timer will work normally when the system is busy, but
+ * will not cause a CPU to come out of idle just to service it; instead,
+ * the timer will be serviced when the CPU eventually wakes up with a
+ * subsequent non-deferrable timer.
+ */
+#define TBASE_DEFERRABLE_FLAG          (0x1)
+
 #define TIMER_INITIALIZER(_function, _expires, _data) {                \
                .entry = { .prev = TIMER_ENTRY_STATIC },        \
                .function = (_function),                        \
                .expires = (_expires),                          \
                .data = (_data),                                \
                .base = &boot_tvec_bases,                       \
+               .slack = -1,                                    \
+               __TIMER_LOCKDEP_MAP_INITIALIZER(                \
+                       __FILE__ ":" __stringify(__LINE__))     \
+       }
+
+#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *)         \
+                 ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG))
+
+#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\
+               .entry = { .prev = TIMER_ENTRY_STATIC },        \
+               .function = (_function),                        \
+               .expires = (_expires),                          \
+               .data = (_data),                                \
+               .base = TBASE_MAKE_DEFERRED(&boot_tvec_bases),  \
                __TIMER_LOCKDEP_MAP_INITIALIZER(                \
                        __FILE__ ":" __stringify(__LINE__))     \
        }
@@ -248,11 +274,11 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer)
 
 extern void add_timer(struct timer_list *timer);
 
+extern int try_to_del_timer_sync(struct timer_list *timer);
+
 #ifdef CONFIG_SMP
-  extern int try_to_del_timer_sync(struct timer_list *timer);
   extern int del_timer_sync(struct timer_list *timer);
 #else
-# define try_to_del_timer_sync(t)      del_timer(t)
 # define del_timer_sync(t)             del_timer(t)
 #endif
 
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
new file mode 100644 (file)
index 0000000..d24aaba
--- /dev/null
@@ -0,0 +1,50 @@
+#ifndef _LINUX_TIMERQUEUE_H
+#define _LINUX_TIMERQUEUE_H
+
+#include <linux/rbtree.h>
+#include <linux/ktime.h>
+
+
+struct timerqueue_node {
+       struct rb_node node;
+       ktime_t expires;
+};
+
+struct timerqueue_head {
+       struct rb_root head;
+       struct timerqueue_node *next;
+};
+
+
+extern void timerqueue_add(struct timerqueue_head *head,
+                               struct timerqueue_node *node);
+extern void timerqueue_del(struct timerqueue_head *head,
+                               struct timerqueue_node *node);
+extern struct timerqueue_node *timerqueue_iterate_next(
+                                               struct timerqueue_node *node);
+
+/**
+ * timerqueue_getnext - Returns the timer with the earlies expiration time
+ *
+ * @head: head of timerqueue
+ *
+ * Returns a pointer to the timer node that has the
+ * earliest expiration time.
+ */
+static inline
+struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
+{
+       return head->next;
+}
+
+static inline void timerqueue_init(struct timerqueue_node *node)
+{
+       RB_CLEAR_NODE(&node->node);
+}
+
+static inline void timerqueue_init_head(struct timerqueue_head *head)
+{
+       head->head = RB_ROOT;
+       head->next = NULL;
+}
+#endif /* _LINUX_TIMERQUEUE_H */
index a4a90b6726ce6129b43174609fb3e35a2bd088ae..d3e4f87e95c0fa67236f92c2a688fdaa640cfaae 100644 (file)
@@ -106,6 +106,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 
 #define TP_PROTO(args...)      args
 #define TP_ARGS(args...)       args
+#define TP_CONDITION(args...)  args
 
 #ifdef CONFIG_TRACEPOINTS
 
@@ -119,12 +120,14 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
  * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
  * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
  */
-#define __DO_TRACE(tp, proto, args)                                    \
+#define __DO_TRACE(tp, proto, args, cond)                              \
        do {                                                            \
                struct tracepoint_func *it_func_ptr;                    \
                void *it_func;                                          \
                void *__data;                                           \
                                                                        \
+               if (!(cond))                                            \
+                       return;                                         \
                rcu_read_lock_sched_notrace();                          \
                it_func_ptr = rcu_dereference_sched((tp)->funcs);       \
                if (it_func_ptr) {                                      \
@@ -142,7 +145,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
  */
-#define __DECLARE_TRACE(name, proto, args, data_proto, data_args)      \
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)        \
        extern struct tracepoint __tracepoint_##name;                   \
        static inline void trace_##name(proto)                          \
        {                                                               \
@@ -151,7 +154,8 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 do_trace:                                                              \
                        __DO_TRACE(&__tracepoint_##name,                \
                                TP_PROTO(data_proto),                   \
-                               TP_ARGS(data_args));                    \
+                               TP_ARGS(data_args),                     \
+                               TP_CONDITION(cond));                    \
        }                                                               \
        static inline int                                               \
        register_trace_##name(void (*probe)(data_proto), void *data)    \
@@ -186,7 +190,7 @@ do_trace:                                                           \
        EXPORT_SYMBOL(__tracepoint_##name)
 
 #else /* !CONFIG_TRACEPOINTS */
-#define __DECLARE_TRACE(name, proto, args, data_proto, data_args)      \
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)        \
        static inline void trace_##name(proto)                          \
        { }                                                             \
        static inline int                                               \
@@ -227,13 +231,20 @@ do_trace:                                                         \
  * "void *__data, proto" as the callback prototype.
  */
 #define DECLARE_TRACE_NOARGS(name)                                     \
-               __DECLARE_TRACE(name, void, , void *__data, __data)
+               __DECLARE_TRACE(name, void, , 1, void *__data, __data)
 
 #define DECLARE_TRACE(name, proto, args)                               \
-               __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),      \
+               __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1,   \
                                PARAMS(void *__data, proto),            \
                                PARAMS(__data, args))
 
+#define DECLARE_TRACE_CONDITION(name, proto, args, cond)               \
+       __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \
+                       PARAMS(void *__data, proto),                    \
+                       PARAMS(__data, args))
+
+#define TRACE_EVENT_FLAGS(event, flag)
+
 #endif /* DECLARE_TRACE */
 
 #ifndef TRACE_EVENT
@@ -347,11 +358,21 @@ do_trace:                                                         \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_CONDITION(template, name, proto,          \
+                              args, cond)                      \
+       DECLARE_TRACE_CONDITION(name, PARAMS(proto),            \
+                               PARAMS(args), PARAMS(cond))
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)  \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define TRACE_EVENT_FN(name, proto, args, struct,              \
                assign, print, reg, unreg)                      \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define TRACE_EVENT_CONDITION(name, proto, args, cond,         \
+                             struct, assign, print)            \
+       DECLARE_TRACE_CONDITION(name, PARAMS(proto),            \
+                               PARAMS(args), PARAMS(cond))
+
+#define TRACE_EVENT_FLAGS(event, flag)
 
 #endif /* ifdef TRACE_EVENT (see note above) */
index 2498bb9fe002a4bafee70389e70c57b842ede507..c9a6abd972a142e7aea4129da8a8e5b9bc64b6ce 100644 (file)
@@ -3,9 +3,9 @@
 
 #include <linux/kernel.h>
 
-struct __una_u16 { u16 x __attribute__((packed)); };
-struct __una_u32 { u32 x __attribute__((packed)); };
-struct __una_u64 { u64 x __attribute__((packed)); };
+struct __una_u16 { u16 x; } __attribute__((packed));
+struct __una_u32 { u32 x; } __attribute__((packed));
+struct __una_u64 { u64 x; } __attribute__((packed));
 
 static inline u16 __get_unaligned_cpu16(const void *p)
 {
index 0c0771f06bfa745e8e4e5add4ec4823cf52eb813..bd257fee60310184b52d0f8c15f1206f6a4f5dad 100644 (file)
@@ -127,12 +127,20 @@ struct execute_work {
        .timer = TIMER_INITIALIZER(NULL, 0, 0),                 \
        }
 
+#define __DEFERRED_WORK_INITIALIZER(n, f) {                    \
+       .work = __WORK_INITIALIZER((n).work, (f)),              \
+       .timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0),        \
+       }
+
 #define DECLARE_WORK(n, f)                                     \
        struct work_struct n = __WORK_INITIALIZER(n, f)
 
 #define DECLARE_DELAYED_WORK(n, f)                             \
        struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
 
+#define DECLARE_DEFERRED_WORK(n, f)                            \
+       struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f)
+
 /*
  * initialize a work item's function pointer
  */
index a1c4d417dfa205e8d5c2cf1d4f9d6bbd7a6ec419..60739c5a23ae3e30e943b55dc45cd53289720db4 100644 (file)
@@ -32,7 +32,4 @@
 #define WM8775_AIN3 4
 #define WM8775_AIN4 8
 
-/* subdev group ID */
-#define WM8775_GID (1 << 0)
-
 #endif
index 0ac3fb5e0973460f3046cdf1f00a0e1114a13dff..bb08692a20b08841ead94acce78c9ead82293d94 100644 (file)
@@ -49,7 +49,6 @@ struct flowi {
        __u8    proto;
        __u8    flags;
 #define FLOWI_FLAG_ANYSRC 0x01
-#define FLOWI_FLAG_MATCH_ANY_IIF 0x02
        union {
                struct {
                        __be16  sport;
index 278312c95f9600bd863fa0dc76c52a7c048a9d19..2ab926860cd855b6ec7861a998d19612624980e6 100644 (file)
@@ -164,5 +164,15 @@ static inline int ipv6_unicast_destination(struct sk_buff *skb)
        return rt->rt6i_flags & RTF_LOCAL;
 }
 
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
+
+static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
+{
+       struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+
+       return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
+              skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
+}
+
 #endif
 #endif
index 9fdf982d1286a95c02a73bae4131746f05de9343..365359b24177a1a52310679293c43dba7b97533f 100644 (file)
@@ -2024,8 +2024,8 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw,
  *
  * This function may not be called in IRQ context. Calls to this function
  * for a single hardware must be synchronized against each other. Calls
- * to this function and ieee80211_tx_status_irqsafe() may not be mixed
- * for a single hardware.
+ * to this function, ieee80211_tx_status_ni() and ieee80211_tx_status_irqsafe()
+ * may not be mixed for a single hardware.
  *
  * @hw: the hardware the frame was transmitted by
  * @skb: the frame that was transmitted, owned by mac80211 after this call
@@ -2033,14 +2033,34 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw,
 void ieee80211_tx_status(struct ieee80211_hw *hw,
                         struct sk_buff *skb);
 
+/**
+ * ieee80211_tx_status_ni - transmit status callback (in process context)
+ *
+ * Like ieee80211_tx_status() but can be called in process context.
+ *
+ * Calls to this function, ieee80211_tx_status() and
+ * ieee80211_tx_status_irqsafe() may not be mixed
+ * for a single hardware.
+ *
+ * @hw: the hardware the frame was transmitted by
+ * @skb: the frame that was transmitted, owned by mac80211 after this call
+ */
+static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw,
+                                         struct sk_buff *skb)
+{
+       local_bh_disable();
+       ieee80211_tx_status(hw, skb);
+       local_bh_enable();
+}
+
 /**
  * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback
  *
  * Like ieee80211_tx_status() but can be called in IRQ context
  * (internally defers to a tasklet.)
  *
- * Calls to this function and ieee80211_tx_status() may not be mixed for a
- * single hardware.
+ * Calls to this function, ieee80211_tx_status() and
+ * ieee80211_tx_status_ni() may not be mixed for a single hardware.
  *
  * @hw: the hardware the frame was transmitted by
  * @skb: the frame that was transmitted, owned by mac80211 after this call
index dd3031aed9d52e7c3bf5d2c6ad464dbdc23477c0..9fcc680ab6b9d1f4de4d15583275efe7acf66975 100644 (file)
@@ -323,7 +323,9 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
 static inline int tcf_valid_offset(const struct sk_buff *skb,
                                   const unsigned char *ptr, const int len)
 {
-       return unlikely((ptr + len) < skb_tail_pointer(skb) && ptr > skb->head);
+       return likely((ptr + len) <= skb_tail_pointer(skb) &&
+                     ptr >= skb->head &&
+                     (ptr <= (ptr + len)));
 }
 
 #ifdef CONFIG_NET_CLS_IND
index ea1f8a83160df419e986d51ad864a14a215cbea3..79f34e2b752f61479a4b8aae9bc29551cff3b7d0 100644 (file)
@@ -610,11 +610,7 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask,
 {
        struct sk_buff *n;
 
-       if ((action == TC_ACT_STOLEN || action == TC_ACT_QUEUED) &&
-           !skb_shared(skb))
-               n = skb_get(skb);
-       else
-               n = skb_clone(skb, gfp_mask);
+       n = skb_clone(skb, gfp_mask);
 
        if (n) {
                n->tc_verd = SET_TC_VERD(n->tc_verd, 0);
index 659d968d95c54afe6df51c3289a0d2d6fb5b34ed..7d3f7ce239b5b831c66b20b7df61e593384604b2 100644 (file)
@@ -754,6 +754,7 @@ struct proto {
        void                    (*unhash)(struct sock *sk);
        void                    (*rehash)(struct sock *sk);
        int                     (*get_port)(struct sock *sk, unsigned short snum);
+       void                    (*clear_sk)(struct sock *sk, int size);
 
        /* Keeping track of sockets in use */
 #ifdef CONFIG_PROC_FS
@@ -852,6 +853,8 @@ static inline void __sk_prot_rehash(struct sock *sk)
        sk->sk_prot->hash(sk);
 }
 
+void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
+
 /* About 10 seconds */
 #define SOCK_DESTROY_TIME (10*HZ)
 
index 1dfab54015113b83bce9f3302470c3a5ed95b5e7..b0b4eb24d592fb1f8ecba11294c10e802ff7cd2b 100644 (file)
 #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
        DEFINE_TRACE(name)
 
+#undef TRACE_EVENT_CONDITION
+#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \
+       TRACE_EVENT(name,                                               \
+               PARAMS(proto),                                          \
+               PARAMS(args),                                           \
+               PARAMS(tstruct),                                        \
+               PARAMS(assign),                                         \
+               PARAMS(print))
+
 #undef TRACE_EVENT_FN
 #define TRACE_EVENT_FN(name, proto, args, tstruct,             \
                assign, print, reg, unreg)                      \
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DEFINE_TRACE(name)
 
+#undef DEFINE_EVENT_CONDITION
+#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \
+       DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)       \
        DEFINE_TRACE(name)
 
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
+#undef TRACE_EVENT_CONDITION
 #undef DECLARE_EVENT_CLASS
 #undef DEFINE_EVENT
 #undef DEFINE_EVENT_PRINT
+#undef DEFINE_EVENT_CONDITION
 #undef TRACE_HEADER_MULTI_READ
 #undef DECLARE_TRACE
 
index 286784d69b8f480343244d8046327a5a7d9883d9..1bcc2a8c00e29966aa8e48f1f135a3330364b668 100644 (file)
@@ -7,16 +7,67 @@
 #include <linux/ktime.h>
 #include <linux/tracepoint.h>
 
-#ifndef _TRACE_POWER_ENUM_
-#define _TRACE_POWER_ENUM_
-enum {
-       POWER_NONE      = 0,
-       POWER_CSTATE    = 1,    /* C-State */
-       POWER_PSTATE    = 2,    /* Fequency change or DVFS */
-       POWER_SSTATE    = 3,    /* Suspend */
-};
+DECLARE_EVENT_CLASS(cpu,
+
+       TP_PROTO(unsigned int state, unsigned int cpu_id),
+
+       TP_ARGS(state, cpu_id),
+
+       TP_STRUCT__entry(
+               __field(        u32,            state           )
+               __field(        u32,            cpu_id          )
+       ),
+
+       TP_fast_assign(
+               __entry->state = state;
+               __entry->cpu_id = cpu_id;
+       ),
+
+       TP_printk("state=%lu cpu_id=%lu", (unsigned long)__entry->state,
+                 (unsigned long)__entry->cpu_id)
+);
+
+DEFINE_EVENT(cpu, cpu_idle,
+
+       TP_PROTO(unsigned int state, unsigned int cpu_id),
+
+       TP_ARGS(state, cpu_id)
+);
+
+/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
+#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING
+#define _PWR_EVENT_AVOID_DOUBLE_DEFINING
+
+#define PWR_EVENT_EXIT -1
 #endif
 
+DEFINE_EVENT(cpu, cpu_frequency,
+
+       TP_PROTO(unsigned int frequency, unsigned int cpu_id),
+
+       TP_ARGS(frequency, cpu_id)
+);
+
+TRACE_EVENT(machine_suspend,
+
+       TP_PROTO(unsigned int state),
+
+       TP_ARGS(state),
+
+       TP_STRUCT__entry(
+               __field(        u32,            state           )
+       ),
+
+       TP_fast_assign(
+               __entry->state = state;
+       ),
+
+       TP_printk("state=%lu", (unsigned long)__entry->state)
+);
+
+/* This code will be removed after deprecation time exceeded (2.6.41) */
+#ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED
+
 /*
  * The power events are used for cpuidle & suspend (power_start, power_end)
  *  and for cpufreq (power_frequency)
@@ -75,6 +126,36 @@ TRACE_EVENT(power_end,
 
 );
 
+/* Deprecated dummy functions must be protected against multi-declartion */
+#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
+#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
+
+enum {
+       POWER_NONE = 0,
+       POWER_CSTATE = 1,
+       POWER_PSTATE = 2,
+};
+#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */
+
+#else /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */
+
+#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
+#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
+enum {
+       POWER_NONE = 0,
+       POWER_CSTATE = 1,
+       POWER_PSTATE = 2,
+};
+
+/* These dummy declaration have to be ripped out when the deprecated
+   events get removed */
+static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {};
+static inline void trace_power_end(u64 cpuid) {};
+static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {};
+#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */
+
+#endif /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */
+
 /*
  * The clock events are used for clock enable/disable and for
  *  clock rate change
@@ -153,7 +234,6 @@ DEFINE_EVENT(power_domain, power_domain_target,
 
        TP_ARGS(name, state, cpu_id)
 );
-
 #endif /* _TRACE_POWER_H */
 
 /* This part must be outside protection */
index fb726ac7caee4f465033ff5d314d788db929ded5..5a4c04a75b3d369fc9665eca1deee12a2d442d61 100644 (file)
@@ -40,6 +40,8 @@ TRACE_EVENT_FN(sys_enter,
        syscall_regfunc, syscall_unregfunc
 );
 
+TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY)
+
 TRACE_EVENT_FN(sys_exit,
 
        TP_PROTO(struct pt_regs *regs, long ret),
@@ -62,6 +64,8 @@ TRACE_EVENT_FN(sys_exit,
        syscall_regfunc, syscall_unregfunc
 );
 
+TRACE_EVENT_FLAGS(sys_exit, TRACE_EVENT_FL_CAP_ANY)
+
 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
 
 #endif /* _TRACE_EVENTS_SYSCALLS_H */
index a9377c0083ad3ed612547f783647132a8268ef09..e16610c208c954541587684c8af64584b01dbfda 100644 (file)
        TRACE_EVENT(name, PARAMS(proto), PARAMS(args),                  \
                PARAMS(tstruct), PARAMS(assign), PARAMS(print))         \
 
+#undef TRACE_EVENT_FLAGS
+#define TRACE_EVENT_FLAGS(name, value)                                 \
+       __TRACE_EVENT_FLAGS(name, value)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
+#undef TRACE_EVENT_FLAGS
+#define TRACE_EVENT_FLAGS(event, flag)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -289,13 +296,19 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = {    \
 
 #undef __array
 #define __array(type, item, len)                                       \
-       BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                         \
-       ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+       do {                                                            \
+               mutex_lock(&event_storage_mutex);                       \
+               BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                 \
+               snprintf(event_storage, sizeof(event_storage),          \
+                        "%s[%d]", #type, len);                         \
+               ret = trace_define_field(event_call, event_storage, #item, \
                                 offsetof(typeof(field), item),         \
                                 sizeof(field.item),                    \
                                 is_signed_type(type), FILTER_OTHER);   \
-       if (ret)                                                        \
-               return ret;
+               mutex_unlock(&event_storage_mutex);                     \
+               if (ret)                                                \
+                       return ret;                                     \
+       } while (0);
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)                                      \
index c9728992a776356e043d21df7b33aa045c2d7904..8dfd094e68753dd919dd50299c82fa7385d6621e 100644 (file)
@@ -393,7 +393,6 @@ config PREEMPT_RCU
 
 config RCU_TRACE
        bool "Enable tracing for RCU"
-       depends on TREE_RCU || TREE_PREEMPT_RCU
        help
          This option provides tracing in RCU which presents stats
          in debugfs for debugging RCU implementation.
@@ -459,6 +458,60 @@ config TREE_RCU_TRACE
          TREE_PREEMPT_RCU implementations, permitting Makefile to
          trivially select kernel/rcutree_trace.c.
 
+config RCU_BOOST
+       bool "Enable RCU priority boosting"
+       depends on RT_MUTEXES && TINY_PREEMPT_RCU
+       default n
+       help
+         This option boosts the priority of preempted RCU readers that
+         block the current preemptible RCU grace period for too long.
+         This option also prevents heavy loads from blocking RCU
+         callback invocation for all flavors of RCU.
+
+         Say Y here if you are working with real-time apps or heavy loads
+         Say N here if you are unsure.
+
+config RCU_BOOST_PRIO
+       int "Real-time priority to boost RCU readers to"
+       range 1 99
+       depends on RCU_BOOST
+       default 1
+       help
+         This option specifies the real-time priority to which preempted
+         RCU readers are to be boosted.  If you are working with CPU-bound
+         real-time applications, you should specify a priority higher then
+         the highest-priority CPU-bound application.
+
+         Specify the real-time priority, or take the default if unsure.
+
+config RCU_BOOST_DELAY
+       int "Milliseconds to delay boosting after RCU grace-period start"
+       range 0 3000
+       depends on RCU_BOOST
+       default 500
+       help
+         This option specifies the time to wait after the beginning of
+         a given grace period before priority-boosting preempted RCU
+         readers blocking that grace period.  Note that any RCU reader
+         blocking an expedited RCU grace period is boosted immediately.
+
+         Accept the default if unsure.
+
+config SRCU_SYNCHRONIZE_DELAY
+       int "Microseconds to delay before waiting for readers"
+       range 0 20
+       default 10
+       help
+         This option controls how long SRCU delays before entering its
+         loop waiting on SRCU readers.  The purpose of this loop is
+         to avoid the unconditional context-switch penalty that would
+         otherwise be incurred if there was an active SRCU reader,
+         in a manner similar to adaptive locking schemes.  This should
+         be set to be a bit longer than the common-case SRCU read-side
+         critical-section overhead.
+
+         Accept the default if unsure.
+
 endmenu # "RCU Subsystem"
 
 config IKCONFIG
@@ -741,6 +794,19 @@ config NET_NS
 
 endif # NAMESPACES
 
+config SCHED_AUTOGROUP
+       bool "Automatic process group scheduling"
+       select EVENTFD
+       select CGROUPS
+       select CGROUP_SCHED
+       select FAIR_GROUP_SCHED
+       help
+         This option optimizes the scheduler for common desktop workloads by
+         automatically creating and populating task groups.  This separation
+         of workloads isolates aggressive CPU burners (like build jobs) from
+         desktop applications.  Task group autogeneration is currently based
+         upon task session.
+
 config MM_OWNER
        bool
 
index 830aaec9c7d5e0cb8df39af760791b5335772768..2b54bef33b55c65f8fda4ebf3129db3453c8f53d 100644 (file)
@@ -93,7 +93,7 @@ no_match:
  *
  * Returns the matching dev_t on success or 0 on failure.
  */
-static dev_t __init devt_from_partuuid(char *uuid_str)
+static dev_t devt_from_partuuid(char *uuid_str)
 {
        dev_t res = 0;
        struct device *dev = NULL;
index 8646401f7a0e4b77579aa13f8de6ac191787be73..ea51770c01701e312f70f9a81357babbf93fb865 100644 (file)
@@ -67,6 +67,7 @@
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
+#include <linux/perf_event.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -603,6 +604,8 @@ asmlinkage void __init start_kernel(void)
                                "enabled *very* early, fixing it\n");
                local_irq_disable();
        }
+       idr_init_cache();
+       perf_event_init();
        rcu_init();
        radix_tree_init();
        /* init some links before init_ISA_irqs() */
@@ -658,7 +661,6 @@ asmlinkage void __init start_kernel(void)
        enable_debug_pagealloc();
        kmemleak_init();
        debug_objects_mem_init();
-       idr_init_cache();
        setup_per_cpu_pageset();
        numa_policy_init();
        if (late_time_init)
@@ -882,6 +884,7 @@ static int __init kernel_init(void * unused)
        smp_prepare_cpus(setup_max_cpus);
 
        do_pre_smp_initcalls();
+       lockup_detector_init();
 
        smp_init();
        sched_init_smp();
index f6e726f184916029e2d1cfdbcd4acb2b26f14e69..156cc555614089345553a6e7710580c4f069be0e 100644 (file)
@@ -189,7 +189,6 @@ static inline void check_for_tasks(int cpu)
 }
 
 struct take_cpu_down_param {
-       struct task_struct *caller;
        unsigned long mod;
        void *hcpu;
 };
@@ -198,7 +197,6 @@ struct take_cpu_down_param {
 static int __ref take_cpu_down(void *_param)
 {
        struct take_cpu_down_param *param = _param;
-       unsigned int cpu = (unsigned long)param->hcpu;
        int err;
 
        /* Ensure this CPU doesn't handle any more interrupts. */
@@ -208,11 +206,6 @@ static int __ref take_cpu_down(void *_param)
 
        cpu_notify(CPU_DYING | param->mod, param->hcpu);
 
-       if (task_cpu(param->caller) == cpu)
-               move_task_off_dead_cpu(cpu, param->caller);
-       /* Force idle task to run as soon as we yield: it should
-          immediately notice cpu is offline and die quickly. */
-       sched_idle_next();
        return 0;
 }
 
@@ -223,7 +216,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        void *hcpu = (void *)(long)cpu;
        unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
        struct take_cpu_down_param tcd_param = {
-               .caller = current,
                .mod = mod,
                .hcpu = hcpu,
        };
@@ -253,9 +245,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        }
        BUG_ON(cpu_online(cpu));
 
-       /* Wait for it to sleep (leaving idle task). */
+       /*
+        * The migration_call() CPU_DYING callback will have removed all
+        * runnable tasks from the cpu, there's only the idle task left now
+        * that the migration thread is done doing the stop_machine thing.
+        *
+        * Wait for the stop thread to go away.
+        */
        while (!idle_cpu(cpu))
-               yield();
+               cpu_relax();
 
        /* This actually kills the CPU. */
        __cpu_die(cpu);
@@ -386,6 +384,14 @@ out:
 #ifdef CONFIG_PM_SLEEP_SMP
 static cpumask_var_t frozen_cpus;
 
+void __weak arch_disable_nonboot_cpus_begin(void)
+{
+}
+
+void __weak arch_disable_nonboot_cpus_end(void)
+{
+}
+
 int disable_nonboot_cpus(void)
 {
        int cpu, first_cpu, error = 0;
@@ -397,6 +403,7 @@ int disable_nonboot_cpus(void)
         * with the userspace trying to use the CPU hotplug at the same time
         */
        cpumask_clear(frozen_cpus);
+       arch_disable_nonboot_cpus_begin();
 
        printk("Disabling non-boot CPUs ...\n");
        for_each_online_cpu(cpu) {
@@ -412,6 +419,8 @@ int disable_nonboot_cpus(void)
                }
        }
 
+       arch_disable_nonboot_cpus_end();
+
        if (!error) {
                BUG_ON(num_online_cpus() > 1);
                /* Make sure the CPUs won't be enabled by someone else */
index 5447dc7defa95b8f0e13acb80b45487df7dc2e73..7d164e25b0f0ea42498d748f389824773e2a0c78 100644 (file)
@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig)
 
 static inline void put_signal_struct(struct signal_struct *sig)
 {
-       if (atomic_dec_and_test(&sig->sigcnt))
+       if (atomic_dec_and_test(&sig->sigcnt)) {
+               sched_autogroup_exit(sig);
                free_signal_struct(sig);
+       }
 }
 
 void __put_task_struct(struct task_struct *tsk)
@@ -905,6 +907,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        posix_cpu_timers_init_group(sig);
 
        tty_audit_fork(sig);
+       sched_autogroup_fork(sig);
 
        sig->oom_adj = current->signal->oom_adj;
        sig->oom_score_adj = current->signal->oom_score_adj;
@@ -1315,7 +1318,7 @@ bad_fork_cleanup_mm:
        }
 bad_fork_cleanup_signal:
        if (!(clone_flags & CLONE_THREAD))
-               free_signal_struct(p->signal);
+               put_signal_struct(p->signal);
 bad_fork_cleanup_sighand:
        __cleanup_sighand(p->sighand);
 bad_fork_cleanup_fs:
index 40a8777a27d0d85e173f4b7a3efbd4ecff1c9654..3019b92e691744169b3ac50bb3836afae5ab1085 100644 (file)
@@ -68,6 +68,14 @@ int __read_mostly futex_cmpxchg_enabled;
 
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
+/*
+ * Futex flags used to encode options to functions and preserve them across
+ * restarts.
+ */
+#define FLAGS_SHARED           0x01
+#define FLAGS_CLOCKRT          0x02
+#define FLAGS_HAS_TIMEOUT      0x04
+
 /*
  * Priority Inheritance state:
  */
@@ -123,6 +131,12 @@ struct futex_q {
        u32 bitset;
 };
 
+static const struct futex_q futex_q_init = {
+       /* list gets initialized in queue_me()*/
+       .key = FUTEX_KEY_INIT,
+       .bitset = FUTEX_BITSET_MATCH_ANY
+};
+
 /*
  * Hash buckets are shared by all the futex_keys that hash to the same
  * location.  Each key may have multiple futex_q structures, one for each task
@@ -283,8 +297,7 @@ again:
        return 0;
 }
 
-static inline
-void put_futex_key(int fshared, union futex_key *key)
+static inline void put_futex_key(union futex_key *key)
 {
        drop_futex_key_refs(key);
 }
@@ -870,7 +883,8 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
 /*
  * Wake up waiters matching bitset queued on this futex (uaddr).
  */
-static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
+static int
+futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 {
        struct futex_hash_bucket *hb;
        struct futex_q *this, *next;
@@ -881,7 +895,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
        if (!bitset)
                return -EINVAL;
 
-       ret = get_futex_key(uaddr, fshared, &key);
+       ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
        if (unlikely(ret != 0))
                goto out;
 
@@ -907,7 +921,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
        }
 
        spin_unlock(&hb->lock);
-       put_futex_key(fshared, &key);
+       put_futex_key(&key);
 out:
        return ret;
 }
@@ -917,7 +931,7 @@ out:
  * to this virtual address:
  */
 static int
-futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
+futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
              int nr_wake, int nr_wake2, int op)
 {
        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
@@ -927,10 +941,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
        int ret, op_ret;
 
 retry:
-       ret = get_futex_key(uaddr1, fshared, &key1);
+       ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
        if (unlikely(ret != 0))
                goto out;
-       ret = get_futex_key(uaddr2, fshared, &key2);
+       ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
        if (unlikely(ret != 0))
                goto out_put_key1;
 
@@ -962,11 +976,11 @@ retry_private:
                if (ret)
                        goto out_put_keys;
 
-               if (!fshared)
+               if (!(flags & FLAGS_SHARED))
                        goto retry_private;
 
-               put_futex_key(fshared, &key2);
-               put_futex_key(fshared, &key1);
+               put_futex_key(&key2);
+               put_futex_key(&key1);
                goto retry;
        }
 
@@ -996,9 +1010,9 @@ retry_private:
 
        double_unlock_hb(hb1, hb2);
 out_put_keys:
-       put_futex_key(fshared, &key2);
+       put_futex_key(&key2);
 out_put_key1:
-       put_futex_key(fshared, &key1);
+       put_futex_key(&key1);
 out:
        return ret;
 }
@@ -1133,13 +1147,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
 /**
  * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
  * @uaddr1:    source futex user address
- * @fshared:   0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
+ * @flags:     futex flags (FLAGS_SHARED, etc.)
  * @uaddr2:    target futex user address
  * @nr_wake:   number of waiters to wake (must be 1 for requeue_pi)
  * @nr_requeue:        number of waiters to requeue (0-INT_MAX)
  * @cmpval:    @uaddr1 expected value (or %NULL)
  * @requeue_pi:        if we are attempting to requeue from a non-pi futex to a
- *             pi futex (pi to pi requeue is not supported)
+ *             pi futex (pi to pi requeue is not supported)
  *
  * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
  * uaddr2 atomically on behalf of the top waiter.
@@ -1148,9 +1162,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
  * >=0 - on success, the number of tasks requeued or woken
  *  <0 - on error
  */
-static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
-                        int nr_wake, int nr_requeue, u32 *cmpval,
-                        int requeue_pi)
+static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
+                        u32 __user *uaddr2, int nr_wake, int nr_requeue,
+                        u32 *cmpval, int requeue_pi)
 {
        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        int drop_count = 0, task_count = 0, ret;
@@ -1191,10 +1205,10 @@ retry:
                pi_state = NULL;
        }
 
-       ret = get_futex_key(uaddr1, fshared, &key1);
+       ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
        if (unlikely(ret != 0))
                goto out;
-       ret = get_futex_key(uaddr2, fshared, &key2);
+       ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
        if (unlikely(ret != 0))
                goto out_put_key1;
 
@@ -1216,11 +1230,11 @@ retry_private:
                        if (ret)
                                goto out_put_keys;
 
-                       if (!fshared)
+                       if (!(flags & FLAGS_SHARED))
                                goto retry_private;
 
-                       put_futex_key(fshared, &key2);
-                       put_futex_key(fshared, &key1);
+                       put_futex_key(&key2);
+                       put_futex_key(&key1);
                        goto retry;
                }
                if (curval != *cmpval) {
@@ -1260,8 +1274,8 @@ retry_private:
                        break;
                case -EFAULT:
                        double_unlock_hb(hb1, hb2);
-                       put_futex_key(fshared, &key2);
-                       put_futex_key(fshared, &key1);
+                       put_futex_key(&key2);
+                       put_futex_key(&key1);
                        ret = fault_in_user_writeable(uaddr2);
                        if (!ret)
                                goto retry;
@@ -1269,8 +1283,8 @@ retry_private:
                case -EAGAIN:
                        /* The owner was exiting, try again. */
                        double_unlock_hb(hb1, hb2);
-                       put_futex_key(fshared, &key2);
-                       put_futex_key(fshared, &key1);
+                       put_futex_key(&key2);
+                       put_futex_key(&key1);
                        cond_resched();
                        goto retry;
                default:
@@ -1352,9 +1366,9 @@ out_unlock:
                drop_futex_key_refs(&key1);
 
 out_put_keys:
-       put_futex_key(fshared, &key2);
+       put_futex_key(&key2);
 out_put_key1:
-       put_futex_key(fshared, &key1);
+       put_futex_key(&key1);
 out:
        if (pi_state != NULL)
                free_pi_state(pi_state);
@@ -1494,7 +1508,7 @@ static void unqueue_me_pi(struct futex_q *q)
  * private futexes.
  */
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-                               struct task_struct *newowner, int fshared)
+                               struct task_struct *newowner)
 {
        u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
        struct futex_pi_state *pi_state = q->pi_state;
@@ -1587,20 +1601,11 @@ handle_fault:
        goto retry;
 }
 
-/*
- * In case we must use restart_block to restart a futex_wait,
- * we encode in the 'flags' shared capability
- */
-#define FLAGS_SHARED           0x01
-#define FLAGS_CLOCKRT          0x02
-#define FLAGS_HAS_TIMEOUT      0x04
-
 static long futex_wait_restart(struct restart_block *restart);
 
 /**
  * fixup_owner() - Post lock pi_state and corner case management
  * @uaddr:     user address of the futex
- * @fshared:   whether the futex is shared (1) or not (0)
  * @q:         futex_q (contains pi_state and access to the rt_mutex)
  * @locked:    if the attempt to take the rt_mutex succeeded (1) or not (0)
  *
@@ -1613,8 +1618,7 @@ static long futex_wait_restart(struct restart_block *restart);
  *  0 - success, lock not taken
  * <0 - on error (-EFAULT)
  */
-static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
-                      int locked)
+static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
 {
        struct task_struct *owner;
        int ret = 0;
@@ -1625,7 +1629,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
                 * did a lock-steal - fix up the PI-state in that case:
                 */
                if (q->pi_state->owner != current)
-                       ret = fixup_pi_state_owner(uaddr, q, current, fshared);
+                       ret = fixup_pi_state_owner(uaddr, q, current);
                goto out;
        }
 
@@ -1652,7 +1656,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
                 * lock. Fix the state up.
                 */
                owner = rt_mutex_owner(&q->pi_state->pi_mutex);
-               ret = fixup_pi_state_owner(uaddr, q, owner, fshared);
+               ret = fixup_pi_state_owner(uaddr, q, owner);
                goto out;
        }
 
@@ -1715,7 +1719,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
  * futex_wait_setup() - Prepare to wait on a futex
  * @uaddr:     the futex userspace address
  * @val:       the expected value
- * @fshared:   whether the futex is shared (1) or not (0)
+ * @flags:     futex flags (FLAGS_SHARED, etc.)
  * @q:         the associated futex_q
  * @hb:                storage for hash_bucket pointer to be returned to caller
  *
@@ -1728,7 +1732,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
  *  0 - uaddr contains val and hb has been locked
  * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
  */
-static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
+static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
                           struct futex_q *q, struct futex_hash_bucket **hb)
 {
        u32 uval;
@@ -1752,8 +1756,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
         * rare, but normal.
         */
 retry:
-       q->key = FUTEX_KEY_INIT;
-       ret = get_futex_key(uaddr, fshared, &q->key);
+       ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
        if (unlikely(ret != 0))
                return ret;
 
@@ -1769,10 +1772,10 @@ retry_private:
                if (ret)
                        goto out;
 
-               if (!fshared)
+               if (!(flags & FLAGS_SHARED))
                        goto retry_private;
 
-               put_futex_key(fshared, &q->key);
+               put_futex_key(&q->key);
                goto retry;
        }
 
@@ -1783,32 +1786,29 @@ retry_private:
 
 out:
        if (ret)
-               put_futex_key(fshared, &q->key);
+               put_futex_key(&q->key);
        return ret;
 }
 
-static int futex_wait(u32 __user *uaddr, int fshared,
-                     u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
+static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
+                     ktime_t *abs_time, u32 bitset)
 {
        struct hrtimer_sleeper timeout, *to = NULL;
        struct restart_block *restart;
        struct futex_hash_bucket *hb;
-       struct futex_q q;
+       struct futex_q q = futex_q_init;
        int ret;
 
        if (!bitset)
                return -EINVAL;
-
-       q.pi_state = NULL;
        q.bitset = bitset;
-       q.rt_waiter = NULL;
-       q.requeue_pi_key = NULL;
 
        if (abs_time) {
                to = &timeout;
 
-               hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
-                                     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+               hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
+                                     CLOCK_REALTIME : CLOCK_MONOTONIC,
+                                     HRTIMER_MODE_ABS);
                hrtimer_init_sleeper(to, current);
                hrtimer_set_expires_range_ns(&to->timer, *abs_time,
                                             current->timer_slack_ns);
@@ -1819,7 +1819,7 @@ retry:
         * Prepare to wait on uaddr. On success, holds hb lock and increments
         * q.key refs.
         */
-       ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+       ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
        if (ret)
                goto out;
 
@@ -1852,12 +1852,7 @@ retry:
        restart->futex.val = val;
        restart->futex.time = abs_time->tv64;
        restart->futex.bitset = bitset;
-       restart->futex.flags = FLAGS_HAS_TIMEOUT;
-
-       if (fshared)
-               restart->futex.flags |= FLAGS_SHARED;
-       if (clockrt)
-               restart->futex.flags |= FLAGS_CLOCKRT;
+       restart->futex.flags = flags;
 
        ret = -ERESTART_RESTARTBLOCK;
 
@@ -1873,7 +1868,6 @@ out:
 static long futex_wait_restart(struct restart_block *restart)
 {
        u32 __user *uaddr = restart->futex.uaddr;
-       int fshared = 0;
        ktime_t t, *tp = NULL;
 
        if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
@@ -1881,11 +1875,9 @@ static long futex_wait_restart(struct restart_block *restart)
                tp = &t;
        }
        restart->fn = do_no_restart_syscall;
-       if (restart->futex.flags & FLAGS_SHARED)
-               fshared = 1;
-       return (long)futex_wait(uaddr, fshared, restart->futex.val, tp,
-                               restart->futex.bitset,
-                               restart->futex.flags & FLAGS_CLOCKRT);
+
+       return (long)futex_wait(uaddr, restart->futex.flags,
+                               restart->futex.val, tp, restart->futex.bitset);
 }
 
 
@@ -1895,12 +1887,12 @@ static long futex_wait_restart(struct restart_block *restart)
  * if there are waiters then it will block, it does PI, etc. (Due to
  * races the kernel might see a 0 value of the futex too.)
  */
-static int futex_lock_pi(u32 __user *uaddr, int fshared,
-                        int detect, ktime_t *time, int trylock)
+static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
+                        ktime_t *time, int trylock)
 {
        struct hrtimer_sleeper timeout, *to = NULL;
        struct futex_hash_bucket *hb;
-       struct futex_q q;
+       struct futex_q q = futex_q_init;
        int res, ret;
 
        if (refill_pi_state_cache())
@@ -1914,12 +1906,8 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
                hrtimer_set_expires(&to->timer, *time);
        }
 
-       q.pi_state = NULL;
-       q.rt_waiter = NULL;
-       q.requeue_pi_key = NULL;
 retry:
-       q.key = FUTEX_KEY_INIT;
-       ret = get_futex_key(uaddr, fshared, &q.key);
+       ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key);
        if (unlikely(ret != 0))
                goto out;
 
@@ -1941,7 +1929,7 @@ retry_private:
                         * exit to complete.
                         */
                        queue_unlock(&q, hb);
-                       put_futex_key(fshared, &q.key);
+                       put_futex_key(&q.key);
                        cond_resched();
                        goto retry;
                default:
@@ -1971,7 +1959,7 @@ retry_private:
         * Fixup the pi_state owner and possibly acquire the lock if we
         * haven't already.
         */
-       res = fixup_owner(uaddr, fshared, &q, !ret);
+       res = fixup_owner(uaddr, &q, !ret);
        /*
         * If fixup_owner() returned an error, proprogate that.  If it acquired
         * the lock, clear our -ETIMEDOUT or -EINTR.
@@ -1995,7 +1983,7 @@ out_unlock_put_key:
        queue_unlock(&q, hb);
 
 out_put_key:
-       put_futex_key(fshared, &q.key);
+       put_futex_key(&q.key);
 out:
        if (to)
                destroy_hrtimer_on_stack(&to->timer);
@@ -2008,10 +1996,10 @@ uaddr_faulted:
        if (ret)
                goto out_put_key;
 
-       if (!fshared)
+       if (!(flags & FLAGS_SHARED))
                goto retry_private;
 
-       put_futex_key(fshared, &q.key);
+       put_futex_key(&q.key);
        goto retry;
 }
 
@@ -2020,7 +2008,7 @@ uaddr_faulted:
  * This is the in-kernel slowpath: we look up the PI state (if any),
  * and do the rt-mutex unlock.
  */
-static int futex_unlock_pi(u32 __user *uaddr, int fshared)
+static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 {
        struct futex_hash_bucket *hb;
        struct futex_q *this, *next;
@@ -2038,7 +2026,7 @@ retry:
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
                return -EPERM;
 
-       ret = get_futex_key(uaddr, fshared, &key);
+       ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
        if (unlikely(ret != 0))
                goto out;
 
@@ -2093,14 +2081,14 @@ retry:
 
 out_unlock:
        spin_unlock(&hb->lock);
-       put_futex_key(fshared, &key);
+       put_futex_key(&key);
 
 out:
        return ret;
 
 pi_faulted:
        spin_unlock(&hb->lock);
-       put_futex_key(fshared, &key);
+       put_futex_key(&key);
 
        ret = fault_in_user_writeable(uaddr);
        if (!ret)
@@ -2160,7 +2148,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
 /**
  * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
  * @uaddr:     the futex we initially wait on (non-pi)
- * @fshared:   whether the futexes are shared (1) or not (0).  They must be
+ * @flags:     futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
  *             the same type, no requeueing from private to shared, etc.
  * @val:       the expected value of uaddr
  * @abs_time:  absolute timeout
@@ -2198,16 +2186,16 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
  *  0 - On success
  * <0 - On error
  */
-static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
+static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                                 u32 val, ktime_t *abs_time, u32 bitset,
-                                int clockrt, u32 __user *uaddr2)
+                                u32 __user *uaddr2)
 {
        struct hrtimer_sleeper timeout, *to = NULL;
        struct rt_mutex_waiter rt_waiter;
        struct rt_mutex *pi_mutex = NULL;
        struct futex_hash_bucket *hb;
-       union futex_key key2;
-       struct futex_q q;
+       union futex_key key2 = FUTEX_KEY_INIT;
+       struct futex_q q = futex_q_init;
        int res, ret;
 
        if (!bitset)
@@ -2215,8 +2203,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
 
        if (abs_time) {
                to = &timeout;
-               hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
-                                     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+               hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
+                                     CLOCK_REALTIME : CLOCK_MONOTONIC,
+                                     HRTIMER_MODE_ABS);
                hrtimer_init_sleeper(to, current);
                hrtimer_set_expires_range_ns(&to->timer, *abs_time,
                                             current->timer_slack_ns);
@@ -2229,12 +2218,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
        debug_rt_mutex_init_waiter(&rt_waiter);
        rt_waiter.task = NULL;
 
-       key2 = FUTEX_KEY_INIT;
-       ret = get_futex_key(uaddr2, fshared, &key2);
+       ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
        if (unlikely(ret != 0))
                goto out;
 
-       q.pi_state = NULL;
        q.bitset = bitset;
        q.rt_waiter = &rt_waiter;
        q.requeue_pi_key = &key2;
@@ -2243,7 +2230,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
         * Prepare to wait on uaddr. On success, increments q.key (key1) ref
         * count.
         */
-       ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+       ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
        if (ret)
                goto out_key2;
 
@@ -2273,8 +2260,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
                 */
                if (q.pi_state && (q.pi_state->owner != current)) {
                        spin_lock(q.lock_ptr);
-                       ret = fixup_pi_state_owner(uaddr2, &q, current,
-                                                  fshared);
+                       ret = fixup_pi_state_owner(uaddr2, &q, current);
                        spin_unlock(q.lock_ptr);
                }
        } else {
@@ -2293,7 +2279,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
                 * Fixup the pi_state owner and possibly acquire the lock if we
                 * haven't already.
                 */
-               res = fixup_owner(uaddr2, fshared, &q, !ret);
+               res = fixup_owner(uaddr2, &q, !ret);
                /*
                 * If fixup_owner() returned an error, proprogate that.  If it
                 * acquired the lock, clear -ETIMEDOUT or -EINTR.
@@ -2324,9 +2310,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
        }
 
 out_put_keys:
-       put_futex_key(fshared, &q.key);
+       put_futex_key(&q.key);
 out_key2:
-       put_futex_key(fshared, &key2);
+       put_futex_key(&key2);
 
 out:
        if (to) {
@@ -2551,58 +2537,57 @@ void exit_robust_list(struct task_struct *curr)
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                u32 __user *uaddr2, u32 val2, u32 val3)
 {
-       int clockrt, ret = -ENOSYS;
-       int cmd = op & FUTEX_CMD_MASK;
-       int fshared = 0;
+       int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK;
+       unsigned int flags = 0;
 
        if (!(op & FUTEX_PRIVATE_FLAG))
-               fshared = 1;
+               flags |= FLAGS_SHARED;
 
-       clockrt = op & FUTEX_CLOCK_REALTIME;
-       if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
-               return -ENOSYS;
+       if (op & FUTEX_CLOCK_REALTIME) {
+               flags |= FLAGS_CLOCKRT;
+               if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
+                       return -ENOSYS;
+       }
 
        switch (cmd) {
        case FUTEX_WAIT:
                val3 = FUTEX_BITSET_MATCH_ANY;
        case FUTEX_WAIT_BITSET:
-               ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
+               ret = futex_wait(uaddr, flags, val, timeout, val3);
                break;
        case FUTEX_WAKE:
                val3 = FUTEX_BITSET_MATCH_ANY;
        case FUTEX_WAKE_BITSET:
-               ret = futex_wake(uaddr, fshared, val, val3);
+               ret = futex_wake(uaddr, flags, val, val3);
                break;
        case FUTEX_REQUEUE:
-               ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
+               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
                break;
        case FUTEX_CMP_REQUEUE:
-               ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
-                                   0);
+               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
                break;
        case FUTEX_WAKE_OP:
-               ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
+               ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
                break;
        case FUTEX_LOCK_PI:
                if (futex_cmpxchg_enabled)
-                       ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
+                       ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
                break;
        case FUTEX_UNLOCK_PI:
                if (futex_cmpxchg_enabled)
-                       ret = futex_unlock_pi(uaddr, fshared);
+                       ret = futex_unlock_pi(uaddr, flags);
                break;
        case FUTEX_TRYLOCK_PI:
                if (futex_cmpxchg_enabled)
-                       ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
+                       ret = futex_lock_pi(uaddr, flags, 0, timeout, 1);
                break;
        case FUTEX_WAIT_REQUEUE_PI:
                val3 = FUTEX_BITSET_MATCH_ANY;
-               ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
-                                           clockrt, uaddr2);
+               ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
+                                           uaddr2);
                break;
        case FUTEX_CMP_REQUEUE_PI:
-               ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
-                                   1);
+               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
                break;
        default:
                ret = -ENOSYS;
index 72206cf5c6cf854898d889a6a645e44febdd526f..f2429fc3438c4f1c2094e59fe54415dc30e4bb51 100644 (file)
@@ -516,10 +516,13 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
 
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
                struct hrtimer *timer;
+               struct timerqueue_node *next;
 
-               if (!base->first)
+               next = timerqueue_getnext(&base->active);
+               if (!next)
                        continue;
-               timer = rb_entry(base->first, struct hrtimer, node);
+               timer = container_of(next, struct hrtimer, node);
+
                expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
                /*
                 * clock_was_set() has changed base->offset so the
@@ -840,48 +843,17 @@ EXPORT_SYMBOL_GPL(hrtimer_forward);
 static int enqueue_hrtimer(struct hrtimer *timer,
                           struct hrtimer_clock_base *base)
 {
-       struct rb_node **link = &base->active.rb_node;
-       struct rb_node *parent = NULL;
-       struct hrtimer *entry;
-       int leftmost = 1;
-
        debug_activate(timer);
 
-       /*
-        * Find the right place in the rbtree:
-        */
-       while (*link) {
-               parent = *link;
-               entry = rb_entry(parent, struct hrtimer, node);
-               /*
-                * We dont care about collisions. Nodes with
-                * the same expiry time stay together.
-                */
-               if (hrtimer_get_expires_tv64(timer) <
-                               hrtimer_get_expires_tv64(entry)) {
-                       link = &(*link)->rb_left;
-               } else {
-                       link = &(*link)->rb_right;
-                       leftmost = 0;
-               }
-       }
-
-       /*
-        * Insert the timer to the rbtree and check whether it
-        * replaces the first pending timer
-        */
-       if (leftmost)
-               base->first = &timer->node;
+       timerqueue_add(&base->active, &timer->node);
 
-       rb_link_node(&timer->node, parent, link);
-       rb_insert_color(&timer->node, &base->active);
        /*
         * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
         * state of a possibly running callback.
         */
        timer->state |= HRTIMER_STATE_ENQUEUED;
 
-       return leftmost;
+       return (&timer->node == base->active.next);
 }
 
 /*
@@ -901,12 +873,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
        if (!(timer->state & HRTIMER_STATE_ENQUEUED))
                goto out;
 
-       /*
-        * Remove the timer from the rbtree and replace the first
-        * entry pointer if necessary.
-        */
-       if (base->first == &timer->node) {
-               base->first = rb_next(&timer->node);
+       if (&timer->node == timerqueue_getnext(&base->active)) {
 #ifdef CONFIG_HIGH_RES_TIMERS
                /* Reprogram the clock event device. if enabled */
                if (reprogram && hrtimer_hres_active()) {
@@ -919,7 +886,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
                }
 #endif
        }
-       rb_erase(&timer->node, &base->active);
+       timerqueue_del(&base->active, &timer->node);
 out:
        timer->state = newstate;
 }
@@ -1128,11 +1095,13 @@ ktime_t hrtimer_get_next_event(void)
        if (!hrtimer_hres_active()) {
                for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
                        struct hrtimer *timer;
+                       struct timerqueue_node *next;
 
-                       if (!base->first)
+                       next = timerqueue_getnext(&base->active);
+                       if (!next)
                                continue;
 
-                       timer = rb_entry(base->first, struct hrtimer, node);
+                       timer = container_of(next, struct hrtimer, node);
                        delta.tv64 = hrtimer_get_expires_tv64(timer);
                        delta = ktime_sub(delta, base->get_time());
                        if (delta.tv64 < mindelta.tv64)
@@ -1162,6 +1131,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 
        timer->base = &cpu_base->clock_base[clock_id];
        hrtimer_init_timer_hres(timer);
+       timerqueue_init(&timer->node);
 
 #ifdef CONFIG_TIMER_STATS
        timer->start_site = NULL;
@@ -1278,14 +1248,14 @@ retry:
 
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                ktime_t basenow;
-               struct rb_node *node;
+               struct timerqueue_node *node;
 
                basenow = ktime_add(now, base->offset);
 
-               while ((node = base->first)) {
+               while ((node = timerqueue_getnext(&base->active))) {
                        struct hrtimer *timer;
 
-                       timer = rb_entry(node, struct hrtimer, node);
+                       timer = container_of(node, struct hrtimer, node);
 
                        /*
                         * The immediate goal for using the softexpires is
@@ -1441,7 +1411,7 @@ void hrtimer_run_pending(void)
  */
 void hrtimer_run_queues(void)
 {
-       struct rb_node *node;
+       struct timerqueue_node *node;
        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
        struct hrtimer_clock_base *base;
        int index, gettime = 1;
@@ -1451,8 +1421,7 @@ void hrtimer_run_queues(void)
 
        for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
                base = &cpu_base->clock_base[index];
-
-               if (!base->first)
+               if (!timerqueue_getnext(&base->active))
                        continue;
 
                if (gettime) {
@@ -1462,10 +1431,10 @@ void hrtimer_run_queues(void)
 
                raw_spin_lock(&cpu_base->lock);
 
-               while ((node = base->first)) {
+               while ((node = timerqueue_getnext(&base->active))) {
                        struct hrtimer *timer;
 
-                       timer = rb_entry(node, struct hrtimer, node);
+                       timer = container_of(node, struct hrtimer, node);
                        if (base->softirq_time.tv64 <=
                                        hrtimer_get_expires_tv64(timer))
                                break;
@@ -1630,8 +1599,10 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
 
        raw_spin_lock_init(&cpu_base->lock);
 
-       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
+       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                cpu_base->clock_base[i].cpu_base = cpu_base;
+               timerqueue_init_head(&cpu_base->clock_base[i].active);
+       }
 
        hrtimer_init_hres(cpu_base);
 }
@@ -1642,10 +1613,10 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
                                struct hrtimer_clock_base *new_base)
 {
        struct hrtimer *timer;
-       struct rb_node *node;
+       struct timerqueue_node *node;
 
-       while ((node = rb_first(&old_base->active))) {
-               timer = rb_entry(node, struct hrtimer, node);
+       while ((node = timerqueue_getnext(&old_base->active))) {
+               timer = container_of(node, struct hrtimer, node);
                BUG_ON(hrtimer_callback_running(timer));
                debug_deactivate(timer);
 
index e5325825aeb6e1e4ea0514ee37cfa53412ec4e3c..086adf25a55e3aaecf3eb3172a7569b2c1a209e0 100644 (file)
@@ -641,7 +641,7 @@ int __init init_hw_breakpoint(void)
 
        constraints_initialized = 1;
 
-       perf_pmu_register(&perf_breakpoint);
+       perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
 
        return register_die_notifier(&hw_breakpoint_exceptions_nb);
 
index 5f92acc5f952e0afb0489017c265a943a4a7d464..91a5fa25054e1d14d62339749f3229fae49f3766 100644 (file)
@@ -577,7 +577,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
  */
 static int irq_thread(void *data)
 {
-       struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
+       static struct sched_param param = {
+               .sched_priority = MAX_USER_RT_PRIO/2,
+       };
        struct irqaction *action = data;
        struct irq_desc *desc = irq_to_desc(action->irq);
        int wake, oneshot = desc->status & IRQ_ONESHOT;
index 9737a76e106ff1554ecc2174f0e49a92b5badf45..7663e5df0e6f731f1804201a5e6cdf9b6162dd05 100644 (file)
@@ -354,13 +354,20 @@ static inline int kprobe_aggrprobe(struct kprobe *p)
        return p->pre_handler == aggr_pre_handler;
 }
 
+/* Return true(!0) if the kprobe is unused */
+static inline int kprobe_unused(struct kprobe *p)
+{
+       return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
+              list_empty(&p->list);
+}
+
 /*
  * Keep all fields in the kprobe consistent
  */
-static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
+static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
 {
-       memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
-       memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
+       memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
+       memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
 }
 
 #ifdef CONFIG_OPTPROBES
@@ -384,6 +391,17 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
        }
 }
 
+/* Free optimized instructions and optimized_kprobe */
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+       struct optimized_kprobe *op;
+
+       op = container_of(p, struct optimized_kprobe, kp);
+       arch_remove_optimized_kprobe(op);
+       arch_remove_kprobe(p);
+       kfree(op);
+}
+
 /* Return true(!0) if the kprobe is ready for optimization. */
 static inline int kprobe_optready(struct kprobe *p)
 {
@@ -397,6 +415,33 @@ static inline int kprobe_optready(struct kprobe *p)
        return 0;
 }
 
+/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
+static inline int kprobe_disarmed(struct kprobe *p)
+{
+       struct optimized_kprobe *op;
+
+       /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
+       if (!kprobe_aggrprobe(p))
+               return kprobe_disabled(p);
+
+       op = container_of(p, struct optimized_kprobe, kp);
+
+       return kprobe_disabled(p) && list_empty(&op->list);
+}
+
+/* Return true(!0) if the probe is queued on (un)optimizing lists */
+static int __kprobes kprobe_queued(struct kprobe *p)
+{
+       struct optimized_kprobe *op;
+
+       if (kprobe_aggrprobe(p)) {
+               op = container_of(p, struct optimized_kprobe, kp);
+               if (!list_empty(&op->list))
+                       return 1;
+       }
+       return 0;
+}
+
 /*
  * Return an optimized kprobe whose optimizing code replaces
  * instructions including addr (exclude breakpoint).
@@ -422,30 +467,23 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
 
 /* Optimization staging list, protected by kprobe_mutex */
 static LIST_HEAD(optimizing_list);
+static LIST_HEAD(unoptimizing_list);
 
 static void kprobe_optimizer(struct work_struct *work);
 static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
+static DECLARE_COMPLETION(optimizer_comp);
 #define OPTIMIZE_DELAY 5
 
-/* Kprobe jump optimizer */
-static __kprobes void kprobe_optimizer(struct work_struct *work)
+/*
+ * Optimize (replace a breakpoint with a jump) kprobes listed on
+ * optimizing_list.
+ */
+static __kprobes void do_optimize_kprobes(void)
 {
-       struct optimized_kprobe *op, *tmp;
-
-       /* Lock modules while optimizing kprobes */
-       mutex_lock(&module_mutex);
-       mutex_lock(&kprobe_mutex);
-       if (kprobes_all_disarmed || !kprobes_allow_optimization)
-               goto end;
-
-       /*
-        * Wait for quiesence period to ensure all running interrupts
-        * are done. Because optprobe may modify multiple instructions
-        * there is a chance that Nth instruction is interrupted. In that
-        * case, running interrupt can return to 2nd-Nth byte of jump
-        * instruction. This wait is for avoiding it.
-        */
-       synchronize_sched();
+       /* Optimization never be done when disarmed */
+       if (kprobes_all_disarmed || !kprobes_allow_optimization ||
+           list_empty(&optimizing_list))
+               return;
 
        /*
         * The optimization/unoptimization refers online_cpus via
@@ -459,17 +497,111 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
         */
        get_online_cpus();
        mutex_lock(&text_mutex);
-       list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
-               WARN_ON(kprobe_disabled(&op->kp));
-               if (arch_optimize_kprobe(op) < 0)
-                       op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
-               list_del_init(&op->list);
+       arch_optimize_kprobes(&optimizing_list);
+       mutex_unlock(&text_mutex);
+       put_online_cpus();
+}
+
+/*
+ * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
+ * if need) kprobes listed on unoptimizing_list.
+ */
+static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
+{
+       struct optimized_kprobe *op, *tmp;
+
+       /* Unoptimization must be done anytime */
+       if (list_empty(&unoptimizing_list))
+               return;
+
+       /* Ditto to do_optimize_kprobes */
+       get_online_cpus();
+       mutex_lock(&text_mutex);
+       arch_unoptimize_kprobes(&unoptimizing_list, free_list);
+       /* Loop free_list for disarming */
+       list_for_each_entry_safe(op, tmp, free_list, list) {
+               /* Disarm probes if marked disabled */
+               if (kprobe_disabled(&op->kp))
+                       arch_disarm_kprobe(&op->kp);
+               if (kprobe_unused(&op->kp)) {
+                       /*
+                        * Remove unused probes from hash list. After waiting
+                        * for synchronization, these probes are reclaimed.
+                        * (reclaiming is done by do_free_cleaned_kprobes.)
+                        */
+                       hlist_del_rcu(&op->kp.hlist);
+               } else
+                       list_del_init(&op->list);
        }
        mutex_unlock(&text_mutex);
        put_online_cpus();
-end:
+}
+
+/* Reclaim all kprobes on the free_list */
+static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
+{
+       struct optimized_kprobe *op, *tmp;
+
+       list_for_each_entry_safe(op, tmp, free_list, list) {
+               BUG_ON(!kprobe_unused(&op->kp));
+               list_del_init(&op->list);
+               free_aggr_kprobe(&op->kp);
+       }
+}
+
+/* Start optimizer after OPTIMIZE_DELAY passed */
+static __kprobes void kick_kprobe_optimizer(void)
+{
+       if (!delayed_work_pending(&optimizing_work))
+               schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+}
+
+/* Kprobe jump optimizer */
+static __kprobes void kprobe_optimizer(struct work_struct *work)
+{
+       LIST_HEAD(free_list);
+
+       /* Lock modules while optimizing kprobes */
+       mutex_lock(&module_mutex);
+       mutex_lock(&kprobe_mutex);
+
+       /*
+        * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
+        * kprobes before waiting for quiesence period.
+        */
+       do_unoptimize_kprobes(&free_list);
+
+       /*
+        * Step 2: Wait for quiesence period to ensure all running interrupts
+        * are done. Because optprobe may modify multiple instructions
+        * there is a chance that Nth instruction is interrupted. In that
+        * case, running interrupt can return to 2nd-Nth byte of jump
+        * instruction. This wait is for avoiding it.
+        */
+       synchronize_sched();
+
+       /* Step 3: Optimize kprobes after quiesence period */
+       do_optimize_kprobes();
+
+       /* Step 4: Free cleaned kprobes after quiesence period */
+       do_free_cleaned_kprobes(&free_list);
+
        mutex_unlock(&kprobe_mutex);
        mutex_unlock(&module_mutex);
+
+       /* Step 5: Kick optimizer again if needed */
+       if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
+               kick_kprobe_optimizer();
+       else
+               /* Wake up all waiters */
+               complete_all(&optimizer_comp);
+}
+
+/* Wait for completing optimization and unoptimization */
+static __kprobes void wait_for_kprobe_optimizer(void)
+{
+       if (delayed_work_pending(&optimizing_work))
+               wait_for_completion(&optimizer_comp);
 }
 
 /* Optimize kprobe if p is ready to be optimized */
@@ -495,42 +627,99 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
        /* Check if it is already optimized. */
        if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
                return;
-
        op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
-       list_add(&op->list, &optimizing_list);
-       if (!delayed_work_pending(&optimizing_work))
-               schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+
+       if (!list_empty(&op->list))
+               /* This is under unoptimizing. Just dequeue the probe */
+               list_del_init(&op->list);
+       else {
+               list_add(&op->list, &optimizing_list);
+               kick_kprobe_optimizer();
+       }
+}
+
+/* Short cut to direct unoptimizing */
+static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+       get_online_cpus();
+       arch_unoptimize_kprobe(op);
+       put_online_cpus();
+       if (kprobe_disabled(&op->kp))
+               arch_disarm_kprobe(&op->kp);
 }
 
 /* Unoptimize a kprobe if p is optimized */
-static __kprobes void unoptimize_kprobe(struct kprobe *p)
+static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
 {
        struct optimized_kprobe *op;
 
-       if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
-               op = container_of(p, struct optimized_kprobe, kp);
-               if (!list_empty(&op->list))
-                       /* Dequeue from the optimization queue */
+       if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
+               return; /* This is not an optprobe nor optimized */
+
+       op = container_of(p, struct optimized_kprobe, kp);
+       if (!kprobe_optimized(p)) {
+               /* Unoptimized or unoptimizing case */
+               if (force && !list_empty(&op->list)) {
+                       /*
+                        * Only if this is unoptimizing kprobe and forced,
+                        * forcibly unoptimize it. (No need to unoptimize
+                        * unoptimized kprobe again :)
+                        */
                        list_del_init(&op->list);
-               else
-                       /* Replace jump with break */
-                       arch_unoptimize_kprobe(op);
-               op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+                       force_unoptimize_kprobe(op);
+               }
+               return;
+       }
+
+       op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+       if (!list_empty(&op->list)) {
+               /* Dequeue from the optimization queue */
+               list_del_init(&op->list);
+               return;
+       }
+       /* Optimized kprobe case */
+       if (force)
+               /* Forcibly update the code: this is a special case */
+               force_unoptimize_kprobe(op);
+       else {
+               list_add(&op->list, &unoptimizing_list);
+               kick_kprobe_optimizer();
        }
 }
 
+/* Cancel unoptimizing for reusing */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+       struct optimized_kprobe *op;
+
+       BUG_ON(!kprobe_unused(ap));
+       /*
+        * Unused kprobe MUST be on the way of delayed unoptimizing (means
+        * there is still a relative jump) and disabled.
+        */
+       op = container_of(ap, struct optimized_kprobe, kp);
+       if (unlikely(list_empty(&op->list)))
+               printk(KERN_WARNING "Warning: found a stray unused "
+                       "aggrprobe@%p\n", ap->addr);
+       /* Enable the probe again */
+       ap->flags &= ~KPROBE_FLAG_DISABLED;
+       /* Optimize it again (remove from op->list) */
+       BUG_ON(!kprobe_optready(ap));
+       optimize_kprobe(ap);
+}
+
 /* Remove optimized instructions */
 static void __kprobes kill_optimized_kprobe(struct kprobe *p)
 {
        struct optimized_kprobe *op;
 
        op = container_of(p, struct optimized_kprobe, kp);
-       if (!list_empty(&op->list)) {
-               /* Dequeue from the optimization queue */
+       if (!list_empty(&op->list))
+               /* Dequeue from the (un)optimization queue */
                list_del_init(&op->list);
-               op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
-       }
-       /* Don't unoptimize, because the target code will be freed. */
+
+       op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+       /* Don't touch the code, because it is already freed. */
        arch_remove_optimized_kprobe(op);
 }
 
@@ -543,16 +732,6 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
        arch_prepare_optimized_kprobe(op);
 }
 
-/* Free optimized instructions and optimized_kprobe */
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
-{
-       struct optimized_kprobe *op;
-
-       op = container_of(p, struct optimized_kprobe, kp);
-       arch_remove_optimized_kprobe(op);
-       kfree(op);
-}
-
 /* Allocate new optimized_kprobe and try to prepare optimized instructions */
 static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 {
@@ -587,7 +766,8 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
        op = container_of(ap, struct optimized_kprobe, kp);
        if (!arch_prepared_optinsn(&op->optinsn)) {
                /* If failed to setup optimizing, fallback to kprobe */
-               free_aggr_kprobe(ap);
+               arch_remove_optimized_kprobe(op);
+               kfree(op);
                return;
        }
 
@@ -631,21 +811,16 @@ static void __kprobes unoptimize_all_kprobes(void)
                return;
 
        kprobes_allow_optimization = false;
-       printk(KERN_INFO "Kprobes globally unoptimized\n");
-       get_online_cpus();      /* For avoiding text_mutex deadlock */
-       mutex_lock(&text_mutex);
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
                hlist_for_each_entry_rcu(p, node, head, hlist) {
                        if (!kprobe_disabled(p))
-                               unoptimize_kprobe(p);
+                               unoptimize_kprobe(p, false);
                }
        }
-
-       mutex_unlock(&text_mutex);
-       put_online_cpus();
-       /* Allow all currently running kprobes to complete */
-       synchronize_sched();
+       /* Wait for unoptimizing completion */
+       wait_for_kprobe_optimizer();
+       printk(KERN_INFO "Kprobes globally unoptimized\n");
 }
 
 int sysctl_kprobes_optimization;
@@ -669,44 +844,60 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
 }
 #endif /* CONFIG_SYSCTL */
 
+/* Put a breakpoint for a probe. Must be called with text_mutex locked */
 static void __kprobes __arm_kprobe(struct kprobe *p)
 {
-       struct kprobe *old_p;
+       struct kprobe *_p;
 
        /* Check collision with other optimized kprobes */
-       old_p = get_optimized_kprobe((unsigned long)p->addr);
-       if (unlikely(old_p))
-               unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
+       _p = get_optimized_kprobe((unsigned long)p->addr);
+       if (unlikely(_p))
+               /* Fallback to unoptimized kprobe */
+               unoptimize_kprobe(_p, true);
 
        arch_arm_kprobe(p);
        optimize_kprobe(p);     /* Try to optimize (add kprobe to a list) */
 }
 
-static void __kprobes __disarm_kprobe(struct kprobe *p)
+/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
+static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
 {
-       struct kprobe *old_p;
+       struct kprobe *_p;
 
-       unoptimize_kprobe(p);   /* Try to unoptimize */
-       arch_disarm_kprobe(p);
+       unoptimize_kprobe(p, false);    /* Try to unoptimize */
 
-       /* If another kprobe was blocked, optimize it. */
-       old_p = get_optimized_kprobe((unsigned long)p->addr);
-       if (unlikely(old_p))
-               optimize_kprobe(old_p);
+       if (!kprobe_queued(p)) {
+               arch_disarm_kprobe(p);
+               /* If another kprobe was blocked, optimize it. */
+               _p = get_optimized_kprobe((unsigned long)p->addr);
+               if (unlikely(_p) && reopt)
+                       optimize_kprobe(_p);
+       }
+       /* TODO: reoptimize others after unoptimized this probe */
 }
 
 #else /* !CONFIG_OPTPROBES */
 
 #define optimize_kprobe(p)                     do {} while (0)
-#define unoptimize_kprobe(p)                   do {} while (0)
+#define unoptimize_kprobe(p, f)                        do {} while (0)
 #define kill_optimized_kprobe(p)               do {} while (0)
 #define prepare_optimized_kprobe(p)            do {} while (0)
 #define try_to_optimize_kprobe(p)              do {} while (0)
 #define __arm_kprobe(p)                                arch_arm_kprobe(p)
-#define __disarm_kprobe(p)                     arch_disarm_kprobe(p)
+#define __disarm_kprobe(p, o)                  arch_disarm_kprobe(p)
+#define kprobe_disarmed(p)                     kprobe_disabled(p)
+#define wait_for_kprobe_optimizer()            do {} while (0)
+
+/* There should be no unused kprobes can be reused without optimization */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+       printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
+       BUG_ON(kprobe_unused(ap));
+}
 
 static __kprobes void free_aggr_kprobe(struct kprobe *p)
 {
+       arch_remove_kprobe(p);
        kfree(p);
 }
 
@@ -732,11 +923,10 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
 /* Disarm a kprobe with text_mutex */
 static void __kprobes disarm_kprobe(struct kprobe *kp)
 {
-       get_online_cpus();      /* For avoiding text_mutex deadlock */
+       /* Ditto */
        mutex_lock(&text_mutex);
-       __disarm_kprobe(kp);
+       __disarm_kprobe(kp, true);
        mutex_unlock(&text_mutex);
-       put_online_cpus();
 }
 
 /*
@@ -942,7 +1132,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
        BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
 
        if (p->break_handler || p->post_handler)
-               unoptimize_kprobe(ap);  /* Fall back to normal kprobe */
+               unoptimize_kprobe(ap, true);    /* Fall back to normal kprobe */
 
        if (p->break_handler) {
                if (ap->break_handler)
@@ -993,19 +1183,21 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
  * This is the second or subsequent kprobe at the address - handle
  * the intricacies
  */
-static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
+static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
                                          struct kprobe *p)
 {
        int ret = 0;
-       struct kprobe *ap = old_p;
+       struct kprobe *ap = orig_p;
 
-       if (!kprobe_aggrprobe(old_p)) {
-               /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
-               ap = alloc_aggr_kprobe(old_p);
+       if (!kprobe_aggrprobe(orig_p)) {
+               /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
+               ap = alloc_aggr_kprobe(orig_p);
                if (!ap)
                        return -ENOMEM;
-               init_aggr_kprobe(ap, old_p);
-       }
+               init_aggr_kprobe(ap, orig_p);
+       } else if (kprobe_unused(ap))
+               /* This probe is going to die. Rescue it */
+               reuse_unused_kprobe(ap);
 
        if (kprobe_gone(ap)) {
                /*
@@ -1039,23 +1231,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
        return add_new_kprobe(ap, p);
 }
 
-/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
-static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
-{
-       struct kprobe *kp;
-
-       list_for_each_entry_rcu(kp, &p->list, list) {
-               if (!kprobe_disabled(kp))
-                       /*
-                        * There is an active probe on the list.
-                        * We can't disable aggr_kprobe.
-                        */
-                       return 0;
-       }
-       p->flags |= KPROBE_FLAG_DISABLED;
-       return 1;
-}
-
 static int __kprobes in_kprobes_functions(unsigned long addr)
 {
        struct kprobe_blackpoint *kb;
@@ -1098,34 +1273,33 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
 static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
 {
-       struct kprobe *old_p, *list_p;
+       struct kprobe *ap, *list_p;
 
-       old_p = get_kprobe(p->addr);
-       if (unlikely(!old_p))
+       ap = get_kprobe(p->addr);
+       if (unlikely(!ap))
                return NULL;
 
-       if (p != old_p) {
-               list_for_each_entry_rcu(list_p, &old_p->list, list)
+       if (p != ap) {
+               list_for_each_entry_rcu(list_p, &ap->list, list)
                        if (list_p == p)
                        /* kprobe p is a valid probe */
                                goto valid;
                return NULL;
        }
 valid:
-       return old_p;
+       return ap;
 }
 
 /* Return error if the kprobe is being re-registered */
 static inline int check_kprobe_rereg(struct kprobe *p)
 {
        int ret = 0;
-       struct kprobe *old_p;
 
        mutex_lock(&kprobe_mutex);
-       old_p = __get_valid_kprobe(p);
-       if (old_p)
+       if (__get_valid_kprobe(p))
                ret = -EINVAL;
        mutex_unlock(&kprobe_mutex);
+
        return ret;
 }
 
@@ -1229,67 +1403,121 @@ fail_with_jump_label:
 }
 EXPORT_SYMBOL_GPL(register_kprobe);
 
+/* Check if all probes on the aggrprobe are disabled */
+static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
+{
+       struct kprobe *kp;
+
+       list_for_each_entry_rcu(kp, &ap->list, list)
+               if (!kprobe_disabled(kp))
+                       /*
+                        * There is an active probe on the list.
+                        * We can't disable this ap.
+                        */
+                       return 0;
+
+       return 1;
+}
+
+/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
+static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
+{
+       struct kprobe *orig_p;
+
+       /* Get an original kprobe for return */
+       orig_p = __get_valid_kprobe(p);
+       if (unlikely(orig_p == NULL))
+               return NULL;
+
+       if (!kprobe_disabled(p)) {
+               /* Disable probe if it is a child probe */
+               if (p != orig_p)
+                       p->flags |= KPROBE_FLAG_DISABLED;
+
+               /* Try to disarm and disable this/parent probe */
+               if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
+                       disarm_kprobe(orig_p);
+                       orig_p->flags |= KPROBE_FLAG_DISABLED;
+               }
+       }
+
+       return orig_p;
+}
+
 /*
  * Unregister a kprobe without a scheduler synchronization.
  */
 static int __kprobes __unregister_kprobe_top(struct kprobe *p)
 {
-       struct kprobe *old_p, *list_p;
+       struct kprobe *ap, *list_p;
 
-       old_p = __get_valid_kprobe(p);
-       if (old_p == NULL)
+       /* Disable kprobe. This will disarm it if needed. */
+       ap = __disable_kprobe(p);
+       if (ap == NULL)
                return -EINVAL;
 
-       if (old_p == p ||
-           (kprobe_aggrprobe(old_p) &&
-            list_is_singular(&old_p->list))) {
+       if (ap == p)
                /*
-                * Only probe on the hash list. Disarm only if kprobes are
-                * enabled and not gone - otherwise, the breakpoint would
-                * already have been removed. We save on flushing icache.
+                * This probe is an independent(and non-optimized) kprobe
+                * (not an aggrprobe). Remove from the hash list.
                 */
-               if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
-                       disarm_kprobe(old_p);
-               hlist_del_rcu(&old_p->hlist);
-       } else {
+               goto disarmed;
+
+       /* Following process expects this probe is an aggrprobe */
+       WARN_ON(!kprobe_aggrprobe(ap));
+
+       if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
+               /*
+                * !disarmed could be happen if the probe is under delayed
+                * unoptimizing.
+                */
+               goto disarmed;
+       else {
+               /* If disabling probe has special handlers, update aggrprobe */
                if (p->break_handler && !kprobe_gone(p))
-                       old_p->break_handler = NULL;
+                       ap->break_handler = NULL;
                if (p->post_handler && !kprobe_gone(p)) {
-                       list_for_each_entry_rcu(list_p, &old_p->list, list) {
+                       list_for_each_entry_rcu(list_p, &ap->list, list) {
                                if ((list_p != p) && (list_p->post_handler))
                                        goto noclean;
                        }
-                       old_p->post_handler = NULL;
+                       ap->post_handler = NULL;
                }
 noclean:
+               /*
+                * Remove from the aggrprobe: this path will do nothing in
+                * __unregister_kprobe_bottom().
+                */
                list_del_rcu(&p->list);
-               if (!kprobe_disabled(old_p)) {
-                       try_to_disable_aggr_kprobe(old_p);
-                       if (!kprobes_all_disarmed) {
-                               if (kprobe_disabled(old_p))
-                                       disarm_kprobe(old_p);
-                               else
-                                       /* Try to optimize this probe again */
-                                       optimize_kprobe(old_p);
-                       }
-               }
+               if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
+                       /*
+                        * Try to optimize this probe again, because post
+                        * handler may have been changed.
+                        */
+                       optimize_kprobe(ap);
        }
        return 0;
+
+disarmed:
+       BUG_ON(!kprobe_disarmed(ap));
+       hlist_del_rcu(&ap->hlist);
+       return 0;
 }
 
 static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
 {
-       struct kprobe *old_p;
+       struct kprobe *ap;
 
        if (list_empty(&p->list))
+               /* This is an independent kprobe */
                arch_remove_kprobe(p);
        else if (list_is_singular(&p->list)) {
-               /* "p" is the last child of an aggr_kprobe */
-               old_p = list_entry(p->list.next, struct kprobe, list);
+               /* This is the last child of an aggrprobe */
+               ap = list_entry(p->list.next, struct kprobe, list);
                list_del(&p->list);
-               arch_remove_kprobe(old_p);
-               free_aggr_kprobe(old_p);
+               free_aggr_kprobe(ap);
        }
+       /* Otherwise, do nothing. */
 }
 
 int __kprobes register_kprobes(struct kprobe **kps, int num)
@@ -1607,29 +1835,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
 int __kprobes disable_kprobe(struct kprobe *kp)
 {
        int ret = 0;
-       struct kprobe *p;
 
        mutex_lock(&kprobe_mutex);
 
-       /* Check whether specified probe is valid. */
-       p = __get_valid_kprobe(kp);
-       if (unlikely(p == NULL)) {
+       /* Disable this kprobe */
+       if (__disable_kprobe(kp) == NULL)
                ret = -EINVAL;
-               goto out;
-       }
 
-       /* If the probe is already disabled (or gone), just return */
-       if (kprobe_disabled(kp))
-               goto out;
-
-       kp->flags |= KPROBE_FLAG_DISABLED;
-       if (p != kp)
-               /* When kp != p, p is always enabled. */
-               try_to_disable_aggr_kprobe(p);
-
-       if (!kprobes_all_disarmed && kprobe_disabled(p))
-               disarm_kprobe(p);
-out:
        mutex_unlock(&kprobe_mutex);
        return ret;
 }
@@ -1927,36 +2139,27 @@ static void __kprobes disarm_all_kprobes(void)
        mutex_lock(&kprobe_mutex);
 
        /* If kprobes are already disarmed, just return */
-       if (kprobes_all_disarmed)
-               goto already_disabled;
+       if (kprobes_all_disarmed) {
+               mutex_unlock(&kprobe_mutex);
+               return;
+       }
 
        kprobes_all_disarmed = true;
        printk(KERN_INFO "Kprobes globally disabled\n");
 
-       /*
-        * Here we call get_online_cpus() for avoiding text_mutex deadlock,
-        * because disarming may also unoptimize kprobes.
-        */
-       get_online_cpus();
        mutex_lock(&text_mutex);
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
                hlist_for_each_entry_rcu(p, node, head, hlist) {
                        if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
-                               __disarm_kprobe(p);
+                               __disarm_kprobe(p, false);
                }
        }
-
        mutex_unlock(&text_mutex);
-       put_online_cpus();
        mutex_unlock(&kprobe_mutex);
-       /* Allow all currently running kprobes to complete */
-       synchronize_sched();
-       return;
 
-already_disabled:
-       mutex_unlock(&kprobe_mutex);
-       return;
+       /* Wait for disarming all kprobes by optimizer */
+       wait_for_kprobe_optimizer();
 }
 
 /*
index 2dc3786349d1723394c25e512b9b570bcf5a4f7e..5355cfd44a3fd21cd767c13d053410ec338f2ede 100644 (file)
@@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
        wait_for_completion(&create.done);
 
        if (!IS_ERR(create.result)) {
-               struct sched_param param = { .sched_priority = 0 };
+               static struct sched_param param = { .sched_priority = 0 };
                va_list args;
 
                va_start(args, namefmt);
@@ -265,6 +265,17 @@ int kthreadd(void *unused)
        return 0;
 }
 
+void __init_kthread_worker(struct kthread_worker *worker,
+                               const char *name,
+                               struct lock_class_key *key)
+{
+       spin_lock_init(&worker->lock);
+       lockdep_set_class_and_name(&worker->lock, key, name);
+       INIT_LIST_HEAD(&worker->work_list);
+       worker->task = NULL;
+}
+EXPORT_SYMBOL_GPL(__init_kthread_worker);
+
 /**
  * kthread_worker_fn - kthread function to process kthread_worker
  * @worker_ptr: pointer to initialized kthread_worker
index 59b76c8ce9d7172e8176f355da9719495077a133..1969d2fc4b36328cf48798620506ddcd0ec330d0 100644 (file)
@@ -494,7 +494,6 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
                namelen += 2;
 
        for (i = 0; i < LOCKSTAT_POINTS; i++) {
-               char sym[KSYM_SYMBOL_LEN];
                char ip[32];
 
                if (class->contention_point[i] == 0)
@@ -503,15 +502,13 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
                if (!i)
                        seq_line(m, '-', 40-namelen, namelen);
 
-               sprint_symbol(sym, class->contention_point[i]);
                snprintf(ip, sizeof(ip), "[<%p>]",
                                (void *)class->contention_point[i]);
-               seq_printf(m, "%40s %14lu %29s %s\n", name,
-                               stats->contention_point[i],
-                               ip, sym);
+               seq_printf(m, "%40s %14lu %29s %pS\n",
+                          name, stats->contention_point[i],
+                          ip, (void *)class->contention_point[i]);
        }
        for (i = 0; i < LOCKSTAT_POINTS; i++) {
-               char sym[KSYM_SYMBOL_LEN];
                char ip[32];
 
                if (class->contending_point[i] == 0)
@@ -520,12 +517,11 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
                if (!i)
                        seq_line(m, '-', 40-namelen, namelen);
 
-               sprint_symbol(sym, class->contending_point[i]);
                snprintf(ip, sizeof(ip), "[<%p>]",
                                (void *)class->contending_point[i]);
-               seq_printf(m, "%40s %14lu %29s %s\n", name,
-                               stats->contending_point[i],
-                               ip, sym);
+               seq_printf(m, "%40s %14lu %29s %pS\n",
+                          name, stats->contending_point[i],
+                          ip, (void *)class->contending_point[i]);
        }
        if (i) {
                seq_puts(m, "\n");
index d190664f25ff3fa10dca29f37b483f08ad07eae1..34e00b708fad2c79b260ab3d8d4cc199cece8eca 100644 (file)
@@ -56,6 +56,7 @@
 #include <linux/percpu.h>
 #include <linux/kmemleak.h>
 #include <linux/jump_label.h>
+#include <linux/pfn.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/module.h>
 #define ARCH_SHF_SMALL 0
 #endif
 
+/*
+ * Modules' sections will be aligned on page boundaries
+ * to ensure complete separation of code and data, but
+ * only when CONFIG_DEBUG_SET_MODULE_RONX=y
+ */
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+# define debug_align(X) ALIGN(X, PAGE_SIZE)
+#else
+# define debug_align(X) (X)
+#endif
+
+/*
+ * Given BASE and SIZE this macro calculates the number of pages the
+ * memory regions occupies
+ */
+#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ?                \
+               (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \
+                        PFN_DOWN((unsigned long)BASE) + 1)     \
+               : (0UL))
+
 /* If this is set, the section belongs in the init part of the module */
 #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
 
@@ -1542,6 +1563,115 @@ static int __unlink_module(void *_mod)
        return 0;
 }
 
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+/*
+ * LKM RO/NX protection: protect module's text/ro-data
+ * from modification and any data from execution.
+ */
+void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages))
+{
+       unsigned long begin_pfn = PFN_DOWN((unsigned long)start);
+       unsigned long end_pfn = PFN_DOWN((unsigned long)end);
+
+       if (end_pfn > begin_pfn)
+               set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
+}
+
+static void set_section_ro_nx(void *base,
+                       unsigned long text_size,
+                       unsigned long ro_size,
+                       unsigned long total_size)
+{
+       /* begin and end PFNs of the current subsection */
+       unsigned long begin_pfn;
+       unsigned long end_pfn;
+
+       /*
+        * Set RO for module text and RO-data:
+        * - Always protect first page.
+        * - Do not protect last partial page.
+        */
+       if (ro_size > 0)
+               set_page_attributes(base, base + ro_size, set_memory_ro);
+
+       /*
+        * Set NX permissions for module data:
+        * - Do not protect first partial page.
+        * - Always protect last page.
+        */
+       if (total_size > text_size) {
+               begin_pfn = PFN_UP((unsigned long)base + text_size);
+               end_pfn = PFN_UP((unsigned long)base + total_size);
+               if (end_pfn > begin_pfn)
+                       set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
+       }
+}
+
+/* Setting memory back to RW+NX before releasing it */
+void unset_section_ro_nx(struct module *mod, void *module_region)
+{
+       unsigned long total_pages;
+
+       if (mod->module_core == module_region) {
+               /* Set core as NX+RW */
+               total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size);
+               set_memory_nx((unsigned long)mod->module_core, total_pages);
+               set_memory_rw((unsigned long)mod->module_core, total_pages);
+
+       } else if (mod->module_init == module_region) {
+               /* Set init as NX+RW */
+               total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size);
+               set_memory_nx((unsigned long)mod->module_init, total_pages);
+               set_memory_rw((unsigned long)mod->module_init, total_pages);
+       }
+}
+
+/* Iterate through all modules and set each module's text as RW */
+void set_all_modules_text_rw()
+{
+       struct module *mod;
+
+       mutex_lock(&module_mutex);
+       list_for_each_entry_rcu(mod, &modules, list) {
+               if ((mod->module_core) && (mod->core_text_size)) {
+                       set_page_attributes(mod->module_core,
+                                               mod->module_core + mod->core_text_size,
+                                               set_memory_rw);
+               }
+               if ((mod->module_init) && (mod->init_text_size)) {
+                       set_page_attributes(mod->module_init,
+                                               mod->module_init + mod->init_text_size,
+                                               set_memory_rw);
+               }
+       }
+       mutex_unlock(&module_mutex);
+}
+
+/* Iterate through all modules and set each module's text as RO */
+void set_all_modules_text_ro()
+{
+       struct module *mod;
+
+       mutex_lock(&module_mutex);
+       list_for_each_entry_rcu(mod, &modules, list) {
+               if ((mod->module_core) && (mod->core_text_size)) {
+                       set_page_attributes(mod->module_core,
+                                               mod->module_core + mod->core_text_size,
+                                               set_memory_ro);
+               }
+               if ((mod->module_init) && (mod->init_text_size)) {
+                       set_page_attributes(mod->module_init,
+                                               mod->module_init + mod->init_text_size,
+                                               set_memory_ro);
+               }
+       }
+       mutex_unlock(&module_mutex);
+}
+#else
+static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { }
+static inline void unset_section_ro_nx(struct module *mod, void *module_region) { }
+#endif
+
 /* Free a module, remove from lists, etc. */
 static void free_module(struct module *mod)
 {
@@ -1566,6 +1696,7 @@ static void free_module(struct module *mod)
        destroy_params(mod->kp, mod->num_kp);
 
        /* This may be NULL, but that's OK */
+       unset_section_ro_nx(mod, mod->module_init);
        module_free(mod, mod->module_init);
        kfree(mod->args);
        percpu_modfree(mod);
@@ -1574,6 +1705,7 @@ static void free_module(struct module *mod)
        lockdep_free_key_range(mod->module_core, mod->core_size);
 
        /* Finally, free the core (containing the module structure) */
+       unset_section_ro_nx(mod, mod->module_core);
        module_free(mod, mod->module_core);
 
 #ifdef CONFIG_MPU
@@ -1777,8 +1909,19 @@ static void layout_sections(struct module *mod, struct load_info *info)
                        s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
                        DEBUGP("\t%s\n", name);
                }
-               if (m == 0)
+               switch (m) {
+               case 0: /* executable */
+                       mod->core_size = debug_align(mod->core_size);
                        mod->core_text_size = mod->core_size;
+                       break;
+               case 1: /* RO: text and ro-data */
+                       mod->core_size = debug_align(mod->core_size);
+                       mod->core_ro_size = mod->core_size;
+                       break;
+               case 3: /* whole core */
+                       mod->core_size = debug_align(mod->core_size);
+                       break;
+               }
        }
 
        DEBUGP("Init section allocation order:\n");
@@ -1796,8 +1939,19 @@ static void layout_sections(struct module *mod, struct load_info *info)
                                         | INIT_OFFSET_MASK);
                        DEBUGP("\t%s\n", sname);
                }
-               if (m == 0)
+               switch (m) {
+               case 0: /* executable */
+                       mod->init_size = debug_align(mod->init_size);
                        mod->init_text_size = mod->init_size;
+                       break;
+               case 1: /* RO: text and ro-data */
+                       mod->init_size = debug_align(mod->init_size);
+                       mod->init_ro_size = mod->init_size;
+                       break;
+               case 3: /* whole init */
+                       mod->init_size = debug_align(mod->init_size);
+                       break;
+               }
        }
 }
 
@@ -2722,6 +2876,18 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
        blocking_notifier_call_chain(&module_notify_list,
                        MODULE_STATE_COMING, mod);
 
+       /* Set RO and NX regions for core */
+       set_section_ro_nx(mod->module_core,
+                               mod->core_text_size,
+                               mod->core_ro_size,
+                               mod->core_size);
+
+       /* Set RO and NX regions for init */
+       set_section_ro_nx(mod->module_init,
+                               mod->init_text_size,
+                               mod->init_ro_size,
+                               mod->init_size);
+
        do_mod_ctors(mod);
        /* Start the module */
        if (mod->init != NULL)
@@ -2765,6 +2931,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
        mod->symtab = mod->core_symtab;
        mod->strtab = mod->core_strtab;
 #endif
+       unset_section_ro_nx(mod, mod->module_init);
        module_free(mod, mod->module_init);
        mod->module_init = NULL;
        mod->init_size = 0;
index 200407c1502f509ee3f9d8a665bc4d3b78a27f74..a5889fb28ecff33eaf5fae64c9d2a50ca03cb2f7 100644 (file)
@@ -199,7 +199,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                 * memory barriers as we'll eventually observe the right
                 * values at the cost of a few extra spins.
                 */
-               cpu_relax();
+               arch_mutex_cpu_relax();
        }
 #endif
        spin_lock_mutex(&lock->wait_lock, flags);
index 2870feee81dd7a046703645c9ec50022d4339f39..11847bf1e8cc254db7f2a2a255511fd36eea4a68 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
+#include <linux/idr.h>
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
@@ -21,7 +22,9 @@
 #include <linux/dcache.h>
 #include <linux/percpu.h>
 #include <linux/ptrace.h>
+#include <linux/reboot.h>
 #include <linux/vmstat.h>
+#include <linux/device.h>
 #include <linux/vmalloc.h>
 #include <linux/hardirq.h>
 #include <linux/rculist.h>
@@ -133,6 +136,28 @@ static void unclone_ctx(struct perf_event_context *ctx)
        }
 }
 
+static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
+{
+       /*
+        * only top level events have the pid namespace they were created in
+        */
+       if (event->parent)
+               event = event->parent;
+
+       return task_tgid_nr_ns(p, event->ns);
+}
+
+static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
+{
+       /*
+        * only top level events have the pid namespace they were created in
+        */
+       if (event->parent)
+               event = event->parent;
+
+       return task_pid_nr_ns(p, event->ns);
+}
+
 /*
  * If we inherit events we want to return the parent event id
  * to userspace.
@@ -312,9 +337,84 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
                ctx->nr_stat++;
 }
 
+/*
+ * Called at perf_event creation and when events are attached/detached from a
+ * group.
+ */
+static void perf_event__read_size(struct perf_event *event)
+{
+       int entry = sizeof(u64); /* value */
+       int size = 0;
+       int nr = 1;
+
+       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+               size += sizeof(u64);
+
+       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+               size += sizeof(u64);
+
+       if (event->attr.read_format & PERF_FORMAT_ID)
+               entry += sizeof(u64);
+
+       if (event->attr.read_format & PERF_FORMAT_GROUP) {
+               nr += event->group_leader->nr_siblings;
+               size += sizeof(u64);
+       }
+
+       size += entry * nr;
+       event->read_size = size;
+}
+
+static void perf_event__header_size(struct perf_event *event)
+{
+       struct perf_sample_data *data;
+       u64 sample_type = event->attr.sample_type;
+       u16 size = 0;
+
+       perf_event__read_size(event);
+
+       if (sample_type & PERF_SAMPLE_IP)
+               size += sizeof(data->ip);
+
+       if (sample_type & PERF_SAMPLE_ADDR)
+               size += sizeof(data->addr);
+
+       if (sample_type & PERF_SAMPLE_PERIOD)
+               size += sizeof(data->period);
+
+       if (sample_type & PERF_SAMPLE_READ)
+               size += event->read_size;
+
+       event->header_size = size;
+}
+
+static void perf_event__id_header_size(struct perf_event *event)
+{
+       struct perf_sample_data *data;
+       u64 sample_type = event->attr.sample_type;
+       u16 size = 0;
+
+       if (sample_type & PERF_SAMPLE_TID)
+               size += sizeof(data->tid_entry);
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               size += sizeof(data->time);
+
+       if (sample_type & PERF_SAMPLE_ID)
+               size += sizeof(data->id);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               size += sizeof(data->stream_id);
+
+       if (sample_type & PERF_SAMPLE_CPU)
+               size += sizeof(data->cpu_entry);
+
+       event->id_header_size = size;
+}
+
 static void perf_group_attach(struct perf_event *event)
 {
-       struct perf_event *group_leader = event->group_leader;
+       struct perf_event *group_leader = event->group_leader, *pos;
 
        /*
         * We can have double attach due to group movement in perf_event_open.
@@ -333,6 +433,11 @@ static void perf_group_attach(struct perf_event *event)
 
        list_add_tail(&event->group_entry, &group_leader->sibling_list);
        group_leader->nr_siblings++;
+
+       perf_event__header_size(group_leader);
+
+       list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
+               perf_event__header_size(pos);
 }
 
 /*
@@ -391,7 +496,7 @@ static void perf_group_detach(struct perf_event *event)
        if (event->group_leader != event) {
                list_del_init(&event->group_entry);
                event->group_leader->nr_siblings--;
-               return;
+               goto out;
        }
 
        if (!list_empty(&event->group_entry))
@@ -410,6 +515,12 @@ static void perf_group_detach(struct perf_event *event)
                /* Inherit group flags from the previous leader */
                sibling->group_flags = event->group_flags;
        }
+
+out:
+       perf_event__header_size(event->group_leader);
+
+       list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
+               perf_event__header_size(tmp);
 }
 
 static inline int
@@ -1073,7 +1184,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
        /*
         * not supported on inherited events
         */
-       if (event->attr.inherit)
+       if (event->attr.inherit || !is_sampling_event(event))
                return -EINVAL;
 
        atomic_add(refresh, &event->event_limit);
@@ -2289,31 +2400,6 @@ static int perf_release(struct inode *inode, struct file *file)
        return perf_event_release_kernel(event);
 }
 
-static int perf_event_read_size(struct perf_event *event)
-{
-       int entry = sizeof(u64); /* value */
-       int size = 0;
-       int nr = 1;
-
-       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-               size += sizeof(u64);
-
-       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-               size += sizeof(u64);
-
-       if (event->attr.read_format & PERF_FORMAT_ID)
-               entry += sizeof(u64);
-
-       if (event->attr.read_format & PERF_FORMAT_GROUP) {
-               nr += event->group_leader->nr_siblings;
-               size += sizeof(u64);
-       }
-
-       size += entry * nr;
-
-       return size;
-}
-
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
        struct perf_event *child;
@@ -2428,7 +2514,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
        if (event->state == PERF_EVENT_STATE_ERROR)
                return 0;
 
-       if (count < perf_event_read_size(event))
+       if (count < event->read_size)
                return -ENOSPC;
 
        WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -2514,7 +2600,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
        int ret = 0;
        u64 value;
 
-       if (!event->attr.sample_period)
+       if (!is_sampling_event(event))
                return -EINVAL;
 
        if (copy_from_user(&value, arg, sizeof(value)))
@@ -3305,6 +3391,73 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
        } while (len);
 }
 
+static void __perf_event_header__init_id(struct perf_event_header *header,
+                                        struct perf_sample_data *data,
+                                        struct perf_event *event)
+{
+       u64 sample_type = event->attr.sample_type;
+
+       data->type = sample_type;
+       header->size += event->id_header_size;
+
+       if (sample_type & PERF_SAMPLE_TID) {
+               /* namespace issues */
+               data->tid_entry.pid = perf_event_pid(event, current);
+               data->tid_entry.tid = perf_event_tid(event, current);
+       }
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               data->time = perf_clock();
+
+       if (sample_type & PERF_SAMPLE_ID)
+               data->id = primary_event_id(event);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               data->stream_id = event->id;
+
+       if (sample_type & PERF_SAMPLE_CPU) {
+               data->cpu_entry.cpu      = raw_smp_processor_id();
+               data->cpu_entry.reserved = 0;
+       }
+}
+
+static void perf_event_header__init_id(struct perf_event_header *header,
+                                      struct perf_sample_data *data,
+                                      struct perf_event *event)
+{
+       if (event->attr.sample_id_all)
+               __perf_event_header__init_id(header, data, event);
+}
+
+static void __perf_event__output_id_sample(struct perf_output_handle *handle,
+                                          struct perf_sample_data *data)
+{
+       u64 sample_type = data->type;
+
+       if (sample_type & PERF_SAMPLE_TID)
+               perf_output_put(handle, data->tid_entry);
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               perf_output_put(handle, data->time);
+
+       if (sample_type & PERF_SAMPLE_ID)
+               perf_output_put(handle, data->id);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               perf_output_put(handle, data->stream_id);
+
+       if (sample_type & PERF_SAMPLE_CPU)
+               perf_output_put(handle, data->cpu_entry);
+}
+
+static void perf_event__output_id_sample(struct perf_event *event,
+                                        struct perf_output_handle *handle,
+                                        struct perf_sample_data *sample)
+{
+       if (event->attr.sample_id_all)
+               __perf_event__output_id_sample(handle, sample);
+}
+
 int perf_output_begin(struct perf_output_handle *handle,
                      struct perf_event *event, unsigned int size,
                      int nmi, int sample)
@@ -3312,6 +3465,7 @@ int perf_output_begin(struct perf_output_handle *handle,
        struct perf_buffer *buffer;
        unsigned long tail, offset, head;
        int have_lost;
+       struct perf_sample_data sample_data;
        struct {
                struct perf_event_header header;
                u64                      id;
@@ -3338,8 +3492,12 @@ int perf_output_begin(struct perf_output_handle *handle,
                goto out;
 
        have_lost = local_read(&buffer->lost);
-       if (have_lost)
-               size += sizeof(lost_event);
+       if (have_lost) {
+               lost_event.header.size = sizeof(lost_event);
+               perf_event_header__init_id(&lost_event.header, &sample_data,
+                                          event);
+               size += lost_event.header.size;
+       }
 
        perf_output_get_handle(handle);
 
@@ -3370,11 +3528,11 @@ int perf_output_begin(struct perf_output_handle *handle,
        if (have_lost) {
                lost_event.header.type = PERF_RECORD_LOST;
                lost_event.header.misc = 0;
-               lost_event.header.size = sizeof(lost_event);
                lost_event.id          = event->id;
                lost_event.lost        = local_xchg(&buffer->lost, 0);
 
                perf_output_put(handle, lost_event);
+               perf_event__output_id_sample(event, handle, &sample_data);
        }
 
        return 0;
@@ -3407,28 +3565,6 @@ void perf_output_end(struct perf_output_handle *handle)
        rcu_read_unlock();
 }
 
-static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
-{
-       /*
-        * only top level events have the pid namespace they were created in
-        */
-       if (event->parent)
-               event = event->parent;
-
-       return task_tgid_nr_ns(p, event->ns);
-}
-
-static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
-{
-       /*
-        * only top level events have the pid namespace they were created in
-        */
-       if (event->parent)
-               event = event->parent;
-
-       return task_pid_nr_ns(p, event->ns);
-}
-
 static void perf_output_read_one(struct perf_output_handle *handle,
                                 struct perf_event *event,
                                 u64 enabled, u64 running)
@@ -3603,61 +3739,16 @@ void perf_prepare_sample(struct perf_event_header *header,
 {
        u64 sample_type = event->attr.sample_type;
 
-       data->type = sample_type;
-
        header->type = PERF_RECORD_SAMPLE;
-       header->size = sizeof(*header);
+       header->size = sizeof(*header) + event->header_size;
 
        header->misc = 0;
        header->misc |= perf_misc_flags(regs);
 
-       if (sample_type & PERF_SAMPLE_IP) {
-               data->ip = perf_instruction_pointer(regs);
-
-               header->size += sizeof(data->ip);
-       }
-
-       if (sample_type & PERF_SAMPLE_TID) {
-               /* namespace issues */
-               data->tid_entry.pid = perf_event_pid(event, current);
-               data->tid_entry.tid = perf_event_tid(event, current);
-
-               header->size += sizeof(data->tid_entry);
-       }
-
-       if (sample_type & PERF_SAMPLE_TIME) {
-               data->time = perf_clock();
-
-               header->size += sizeof(data->time);
-       }
-
-       if (sample_type & PERF_SAMPLE_ADDR)
-               header->size += sizeof(data->addr);
-
-       if (sample_type & PERF_SAMPLE_ID) {
-               data->id = primary_event_id(event);
-
-               header->size += sizeof(data->id);
-       }
-
-       if (sample_type & PERF_SAMPLE_STREAM_ID) {
-               data->stream_id = event->id;
-
-               header->size += sizeof(data->stream_id);
-       }
-
-       if (sample_type & PERF_SAMPLE_CPU) {
-               data->cpu_entry.cpu             = raw_smp_processor_id();
-               data->cpu_entry.reserved        = 0;
-
-               header->size += sizeof(data->cpu_entry);
-       }
-
-       if (sample_type & PERF_SAMPLE_PERIOD)
-               header->size += sizeof(data->period);
+       __perf_event_header__init_id(header, data, event);
 
-       if (sample_type & PERF_SAMPLE_READ)
-               header->size += perf_event_read_size(event);
+       if (sample_type & PERF_SAMPLE_IP)
+               data->ip = perf_instruction_pointer(regs);
 
        if (sample_type & PERF_SAMPLE_CALLCHAIN) {
                int size = 1;
@@ -3722,23 +3813,26 @@ perf_event_read_event(struct perf_event *event,
                        struct task_struct *task)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        struct perf_read_event read_event = {
                .header = {
                        .type = PERF_RECORD_READ,
                        .misc = 0,
-                       .size = sizeof(read_event) + perf_event_read_size(event),
+                       .size = sizeof(read_event) + event->read_size,
                },
                .pid = perf_event_pid(event, task),
                .tid = perf_event_tid(event, task),
        };
        int ret;
 
+       perf_event_header__init_id(&read_event.header, &sample, event);
        ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
        if (ret)
                return;
 
        perf_output_put(&handle, read_event);
        perf_output_read(&handle, event);
+       perf_event__output_id_sample(event, &handle, &sample);
 
        perf_output_end(&handle);
 }
@@ -3768,14 +3862,16 @@ static void perf_event_task_output(struct perf_event *event,
                                     struct perf_task_event *task_event)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        struct task_struct *task = task_event->task;
-       int size, ret;
+       int ret, size = task_event->event_id.header.size;
 
-       size  = task_event->event_id.header.size;
-       ret = perf_output_begin(&handle, event, size, 0, 0);
+       perf_event_header__init_id(&task_event->event_id.header, &sample, event);
 
+       ret = perf_output_begin(&handle, event,
+                               task_event->event_id.header.size, 0, 0);
        if (ret)
-               return;
+               goto out;
 
        task_event->event_id.pid = perf_event_pid(event, task);
        task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3785,7 +3881,11 @@ static void perf_event_task_output(struct perf_event *event,
 
        perf_output_put(&handle, task_event->event_id);
 
+       perf_event__output_id_sample(event, &handle, &sample);
+
        perf_output_end(&handle);
+out:
+       task_event->event_id.header.size = size;
 }
 
 static int perf_event_task_match(struct perf_event *event)
@@ -3900,11 +4000,16 @@ static void perf_event_comm_output(struct perf_event *event,
                                     struct perf_comm_event *comm_event)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        int size = comm_event->event_id.header.size;
-       int ret = perf_output_begin(&handle, event, size, 0, 0);
+       int ret;
+
+       perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
+       ret = perf_output_begin(&handle, event,
+                               comm_event->event_id.header.size, 0, 0);
 
        if (ret)
-               return;
+               goto out;
 
        comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
        comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
@@ -3912,7 +4017,12 @@ static void perf_event_comm_output(struct perf_event *event,
        perf_output_put(&handle, comm_event->event_id);
        perf_output_copy(&handle, comm_event->comm,
                                   comm_event->comm_size);
+
+       perf_event__output_id_sample(event, &handle, &sample);
+
        perf_output_end(&handle);
+out:
+       comm_event->event_id.header.size = size;
 }
 
 static int perf_event_comm_match(struct perf_event *event)
@@ -3957,7 +4067,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
        comm_event->comm_size = size;
 
        comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
-
        rcu_read_lock();
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
@@ -4038,11 +4147,15 @@ static void perf_event_mmap_output(struct perf_event *event,
                                     struct perf_mmap_event *mmap_event)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        int size = mmap_event->event_id.header.size;
-       int ret = perf_output_begin(&handle, event, size, 0, 0);
+       int ret;
 
+       perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
+       ret = perf_output_begin(&handle, event,
+                               mmap_event->event_id.header.size, 0, 0);
        if (ret)
-               return;
+               goto out;
 
        mmap_event->event_id.pid = perf_event_pid(event, current);
        mmap_event->event_id.tid = perf_event_tid(event, current);
@@ -4050,7 +4163,12 @@ static void perf_event_mmap_output(struct perf_event *event,
        perf_output_put(&handle, mmap_event->event_id);
        perf_output_copy(&handle, mmap_event->file_name,
                                   mmap_event->file_size);
+
+       perf_event__output_id_sample(event, &handle, &sample);
+
        perf_output_end(&handle);
+out:
+       mmap_event->event_id.header.size = size;
 }
 
 static int perf_event_mmap_match(struct perf_event *event,
@@ -4205,6 +4323,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
 static void perf_log_throttle(struct perf_event *event, int enable)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        int ret;
 
        struct {
@@ -4226,11 +4345,15 @@ static void perf_log_throttle(struct perf_event *event, int enable)
        if (enable)
                throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
 
-       ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0);
+       perf_event_header__init_id(&throttle_event.header, &sample, event);
+
+       ret = perf_output_begin(&handle, event,
+                               throttle_event.header.size, 1, 0);
        if (ret)
                return;
 
        perf_output_put(&handle, throttle_event);
+       perf_event__output_id_sample(event, &handle, &sample);
        perf_output_end(&handle);
 }
 
@@ -4246,6 +4369,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
        struct hw_perf_event *hwc = &event->hw;
        int ret = 0;
 
+       /*
+        * Non-sampling counters might still use the PMI to fold short
+        * hardware counters, ignore those.
+        */
+       if (unlikely(!is_sampling_event(event)))
+               return 0;
+
        if (!throttle) {
                hwc->interrupts++;
        } else {
@@ -4391,7 +4521,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
        if (!regs)
                return;
 
-       if (!hwc->sample_period)
+       if (!is_sampling_event(event))
                return;
 
        if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
@@ -4554,7 +4684,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
        struct hw_perf_event *hwc = &event->hw;
        struct hlist_head *head;
 
-       if (hwc->sample_period) {
+       if (is_sampling_event(event)) {
                hwc->last_period = hwc->sample_period;
                perf_swevent_set_period(event);
        }
@@ -4811,15 +4941,6 @@ static int perf_tp_event_init(struct perf_event *event)
        if (event->attr.type != PERF_TYPE_TRACEPOINT)
                return -ENOENT;
 
-       /*
-        * Raw tracepoint data is a severe data leak, only allow root to
-        * have these.
-        */
-       if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
-                       perf_paranoid_tracepoint_raw() &&
-                       !capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
        err = perf_trace_init(event);
        if (err)
                return err;
@@ -4842,7 +4963,7 @@ static struct pmu perf_tracepoint = {
 
 static inline void perf_tp_register(void)
 {
-       perf_pmu_register(&perf_tracepoint);
+       perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
 }
 
 static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4932,31 +5053,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 static void perf_swevent_start_hrtimer(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
+       s64 period;
+
+       if (!is_sampling_event(event))
+               return;
 
        hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        hwc->hrtimer.function = perf_swevent_hrtimer;
-       if (hwc->sample_period) {
-               s64 period = local64_read(&hwc->period_left);
 
-               if (period) {
-                       if (period < 0)
-                               period = 10000;
+       period = local64_read(&hwc->period_left);
+       if (period) {
+               if (period < 0)
+                       period = 10000;
 
-                       local64_set(&hwc->period_left, 0);
-               } else {
-                       period = max_t(u64, 10000, hwc->sample_period);
-               }
-               __hrtimer_start_range_ns(&hwc->hrtimer,
+               local64_set(&hwc->period_left, 0);
+       } else {
+               period = max_t(u64, 10000, hwc->sample_period);
+       }
+       __hrtimer_start_range_ns(&hwc->hrtimer,
                                ns_to_ktime(period), 0,
                                HRTIMER_MODE_REL_PINNED, 0);
-       }
 }
 
 static void perf_swevent_cancel_hrtimer(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
 
-       if (hwc->sample_period) {
+       if (is_sampling_event(event)) {
                ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
                local64_set(&hwc->period_left, ktime_to_ns(remaining));
 
@@ -5184,8 +5307,61 @@ static void free_pmu_context(struct pmu *pmu)
 out:
        mutex_unlock(&pmus_lock);
 }
+static struct idr pmu_idr;
+
+static ssize_t
+type_show(struct device *dev, struct device_attribute *attr, char *page)
+{
+       struct pmu *pmu = dev_get_drvdata(dev);
+
+       return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
+}
+
+static struct device_attribute pmu_dev_attrs[] = {
+       __ATTR_RO(type),
+       __ATTR_NULL,
+};
+
+static int pmu_bus_running;
+static struct bus_type pmu_bus = {
+       .name           = "event_source",
+       .dev_attrs      = pmu_dev_attrs,
+};
+
+static void pmu_dev_release(struct device *dev)
+{
+       kfree(dev);
+}
+
+static int pmu_dev_alloc(struct pmu *pmu)
+{
+       int ret = -ENOMEM;
+
+       pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+       if (!pmu->dev)
+               goto out;
+
+       device_initialize(pmu->dev);
+       ret = dev_set_name(pmu->dev, "%s", pmu->name);
+       if (ret)
+               goto free_dev;
+
+       dev_set_drvdata(pmu->dev, pmu);
+       pmu->dev->bus = &pmu_bus;
+       pmu->dev->release = pmu_dev_release;
+       ret = device_add(pmu->dev);
+       if (ret)
+               goto free_dev;
+
+out:
+       return ret;
+
+free_dev:
+       put_device(pmu->dev);
+       goto out;
+}
 
-int perf_pmu_register(struct pmu *pmu)
+int perf_pmu_register(struct pmu *pmu, char *name, int type)
 {
        int cpu, ret;
 
@@ -5195,13 +5371,38 @@ int perf_pmu_register(struct pmu *pmu)
        if (!pmu->pmu_disable_count)
                goto unlock;
 
+       pmu->type = -1;
+       if (!name)
+               goto skip_type;
+       pmu->name = name;
+
+       if (type < 0) {
+               int err = idr_pre_get(&pmu_idr, GFP_KERNEL);
+               if (!err)
+                       goto free_pdc;
+
+               err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type);
+               if (err) {
+                       ret = err;
+                       goto free_pdc;
+               }
+       }
+       pmu->type = type;
+
+       if (pmu_bus_running) {
+               ret = pmu_dev_alloc(pmu);
+               if (ret)
+                       goto free_idr;
+       }
+
+skip_type:
        pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
        if (pmu->pmu_cpu_context)
                goto got_cpu_context;
 
        pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
        if (!pmu->pmu_cpu_context)
-               goto free_pdc;
+               goto free_dev;
 
        for_each_possible_cpu(cpu) {
                struct perf_cpu_context *cpuctx;
@@ -5245,6 +5446,14 @@ unlock:
 
        return ret;
 
+free_dev:
+       device_del(pmu->dev);
+       put_device(pmu->dev);
+
+free_idr:
+       if (pmu->type >= PERF_TYPE_MAX)
+               idr_remove(&pmu_idr, pmu->type);
+
 free_pdc:
        free_percpu(pmu->pmu_disable_count);
        goto unlock;
@@ -5264,6 +5473,10 @@ void perf_pmu_unregister(struct pmu *pmu)
        synchronize_rcu();
 
        free_percpu(pmu->pmu_disable_count);
+       if (pmu->type >= PERF_TYPE_MAX)
+               idr_remove(&pmu_idr, pmu->type);
+       device_del(pmu->dev);
+       put_device(pmu->dev);
        free_pmu_context(pmu);
 }
 
@@ -5273,6 +5486,13 @@ struct pmu *perf_init_event(struct perf_event *event)
        int idx;
 
        idx = srcu_read_lock(&pmus_srcu);
+
+       rcu_read_lock();
+       pmu = idr_find(&pmu_idr, event->attr.type);
+       rcu_read_unlock();
+       if (pmu)
+               goto unlock;
+
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                int ret = pmu->event_init(event);
                if (!ret)
@@ -5737,6 +5957,12 @@ SYSCALL_DEFINE5(perf_event_open,
        list_add_tail(&event->owner_entry, &current->perf_event_list);
        mutex_unlock(&current->perf_event_mutex);
 
+       /*
+        * Precalculate sample_data sizes
+        */
+       perf_event__header_size(event);
+       perf_event__id_header_size(event);
+
        /*
         * Drop the reference on the group_event after placing the
         * new event on the sibling_list. This ensures destruction
@@ -6089,6 +6315,12 @@ inherit_event(struct perf_event *parent_event,
        child_event->ctx = child_ctx;
        child_event->overflow_handler = parent_event->overflow_handler;
 
+       /*
+        * Precalculate sample_data sizes
+        */
+       perf_event__header_size(child_event);
+       perf_event__id_header_size(child_event);
+
        /*
         * Link it up in the child's context:
         */
@@ -6320,7 +6552,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
        mutex_unlock(&swhash->hlist_mutex);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
 static void perf_pmu_rotate_stop(struct pmu *pmu)
 {
        struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -6374,6 +6606,26 @@ static void perf_event_exit_cpu(int cpu)
 static inline void perf_event_exit_cpu(int cpu) { }
 #endif
 
+static int
+perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
+{
+       int cpu;
+
+       for_each_online_cpu(cpu)
+               perf_event_exit_cpu(cpu);
+
+       return NOTIFY_OK;
+}
+
+/*
+ * Run the perf reboot notifier at the very last possible moment so that
+ * the generic watchdog code runs as long as possible.
+ */
+static struct notifier_block perf_reboot_notifier = {
+       .notifier_call = perf_reboot,
+       .priority = INT_MIN,
+};
+
 static int __cpuinit
 perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 {
@@ -6402,14 +6654,45 @@ void __init perf_event_init(void)
 {
        int ret;
 
+       idr_init(&pmu_idr);
+
        perf_event_init_all_cpus();
        init_srcu_struct(&pmus_srcu);
-       perf_pmu_register(&perf_swevent);
-       perf_pmu_register(&perf_cpu_clock);
-       perf_pmu_register(&perf_task_clock);
+       perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
+       perf_pmu_register(&perf_cpu_clock, NULL, -1);
+       perf_pmu_register(&perf_task_clock, NULL, -1);
        perf_tp_register();
        perf_cpu_notifier(perf_cpu_notify);
+       register_reboot_notifier(&perf_reboot_notifier);
 
        ret = init_hw_breakpoint();
        WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 }
+
+static int __init perf_event_sysfs_init(void)
+{
+       struct pmu *pmu;
+       int ret;
+
+       mutex_lock(&pmus_lock);
+
+       ret = bus_register(&pmu_bus);
+       if (ret)
+               goto unlock;
+
+       list_for_each_entry(pmu, &pmus, entry) {
+               if (!pmu->name || pmu->type < 0)
+                       continue;
+
+               ret = pmu_dev_alloc(pmu);
+               WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
+       }
+       pmu_bus_running = 1;
+       ret = 0;
+
+unlock:
+       mutex_unlock(&pmus_lock);
+
+       return ret;
+}
+device_initcall(perf_event_sysfs_init);
index 9ca4973f736d53b04bf4eea9373ce635cf7098c3..93bd2eb2bc53efe76dd120501b0cbda115b71bfd 100644 (file)
@@ -145,7 +145,13 @@ static int common_timer_del(struct k_itimer *timer);
 
 static enum hrtimer_restart posix_timer_fn(struct hrtimer *data);
 
-static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags);
+static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
+
+#define lock_timer(tid, flags)                                            \
+({     struct k_itimer *__timr;                                           \
+       __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags));  \
+       __timr;                                                            \
+})
 
 static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
 {
@@ -619,7 +625,7 @@ out:
  * the find to the timer lock.  To avoid a dead lock, the timer id MUST
  * be release with out holding the timer lock.
  */
-static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags)
+static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
 {
        struct k_itimer *timr;
        /*
index ecf770509d0d1bb9dce2381716d6e7583ee6c492..031d5e3a61973464eec7e0e72791e423bfb3396f 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/suspend.h>
+#include <trace/events/power.h>
 
 #include "power.h"
 
@@ -201,6 +202,7 @@ int suspend_devices_and_enter(suspend_state_t state)
        if (!suspend_ops)
                return -ENOSYS;
 
+       trace_machine_suspend(state);
        if (suspend_ops->begin) {
                error = suspend_ops->begin(state);
                if (error)
@@ -229,6 +231,7 @@ int suspend_devices_and_enter(suspend_state_t state)
  Close:
        if (suspend_ops->end)
                suspend_ops->end();
+       trace_machine_suspend(PWR_EVENT_EXIT);
        return error;
 
  Recover_platform:
index a23315dc4498844c113cecc9792eabd063e1d87b..ab3ffc5b3b64613507134573dbb94af132c4adff 100644 (file)
@@ -1074,17 +1074,17 @@ static DEFINE_PER_CPU(int, printk_pending);
 
 void printk_tick(void)
 {
-       if (__get_cpu_var(printk_pending)) {
-               __get_cpu_var(printk_pending) = 0;
+       if (__this_cpu_read(printk_pending)) {
+               __this_cpu_write(printk_pending, 0);
                wake_up_interruptible(&log_wait);
        }
 }
 
 int printk_needs_cpu(int cpu)
 {
-       if (unlikely(cpu_is_offline(cpu)))
+       if (cpu_is_offline(cpu))
                printk_tick();
-       return per_cpu(printk_pending, cpu);
+       return __this_cpu_read(printk_pending);
 }
 
 void wake_up_klogd(void)
index d806735342acb10bc3e3ae787e62ade34f1d5955..0344937247495d69b3ef5255ace0d94b2250fac2 100644 (file)
 #include <linux/time.h>
 #include <linux/cpu.h>
 
-/* Global control variables for rcupdate callback mechanism. */
-struct rcu_ctrlblk {
-       struct rcu_head *rcucblist;     /* List of pending callbacks (CBs). */
-       struct rcu_head **donetail;     /* ->next pointer of last "done" CB. */
-       struct rcu_head **curtail;      /* ->next pointer of last CB. */
-};
-
-/* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_sched_ctrlblk = {
-       .donetail       = &rcu_sched_ctrlblk.rcucblist,
-       .curtail        = &rcu_sched_ctrlblk.rcucblist,
-};
-
-static struct rcu_ctrlblk rcu_bh_ctrlblk = {
-       .donetail       = &rcu_bh_ctrlblk.rcucblist,
-       .curtail        = &rcu_bh_ctrlblk.rcucblist,
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-int rcu_scheduler_active __read_mostly;
-EXPORT_SYMBOL_GPL(rcu_scheduler_active);
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */
+static struct task_struct *rcu_kthread_task;
+static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
+static unsigned long have_rcu_kthread_work;
+static void invoke_rcu_kthread(void);
 
 /* Forward declarations for rcutiny_plugin.h. */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+struct rcu_ctrlblk;
+static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+static int rcu_kthread(void *arg);
 static void __call_rcu(struct rcu_head *head,
                       void (*func)(struct rcu_head *rcu),
                       struct rcu_ctrlblk *rcp);
@@ -123,7 +108,7 @@ void rcu_sched_qs(int cpu)
 {
        if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
            rcu_qsctr_help(&rcu_bh_ctrlblk))
-               raise_softirq(RCU_SOFTIRQ);
+               invoke_rcu_kthread();
 }
 
 /*
@@ -132,7 +117,7 @@ void rcu_sched_qs(int cpu)
 void rcu_bh_qs(int cpu)
 {
        if (rcu_qsctr_help(&rcu_bh_ctrlblk))
-               raise_softirq(RCU_SOFTIRQ);
+               invoke_rcu_kthread();
 }
 
 /*
@@ -152,13 +137,14 @@ void rcu_check_callbacks(int cpu, int user)
 }
 
 /*
- * Helper function for rcu_process_callbacks() that operates on the
- * specified rcu_ctrlkblk structure.
+ * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
+ * whose grace period has elapsed.
  */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
+static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 {
        struct rcu_head *next, *list;
        unsigned long flags;
+       RCU_TRACE(int cb_count = 0);
 
        /* If no RCU callbacks ready to invoke, just return. */
        if (&rcp->rcucblist == rcp->donetail)
@@ -180,19 +166,58 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
                next = list->next;
                prefetch(next);
                debug_rcu_head_unqueue(list);
+               local_bh_disable();
                list->func(list);
+               local_bh_enable();
                list = next;
+               RCU_TRACE(cb_count++);
        }
+       RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
 }
 
 /*
- * Invoke any callbacks whose grace period has completed.
+ * This kthread invokes RCU callbacks whose grace periods have
+ * elapsed.  It is awakened as needed, and takes the place of the
+ * RCU_SOFTIRQ that was used previously for this purpose.
+ * This is a kthread, but it is never stopped, at least not until
+ * the system goes down.
  */
-static void rcu_process_callbacks(struct softirq_action *unused)
+static int rcu_kthread(void *arg)
 {
-       __rcu_process_callbacks(&rcu_sched_ctrlblk);
-       __rcu_process_callbacks(&rcu_bh_ctrlblk);
-       rcu_preempt_process_callbacks();
+       unsigned long work;
+       unsigned long morework;
+       unsigned long flags;
+
+       for (;;) {
+               wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0);
+               morework = rcu_boost();
+               local_irq_save(flags);
+               work = have_rcu_kthread_work;
+               have_rcu_kthread_work = morework;
+               local_irq_restore(flags);
+               if (work) {
+                       rcu_process_callbacks(&rcu_sched_ctrlblk);
+                       rcu_process_callbacks(&rcu_bh_ctrlblk);
+                       rcu_preempt_process_callbacks();
+               }
+               schedule_timeout_interruptible(1); /* Leave CPU for others. */
+       }
+
+       return 0;  /* Not reached, but needed to shut gcc up. */
+}
+
+/*
+ * Wake up rcu_kthread() to process callbacks now eligible for invocation
+ * or to boost readers.
+ */
+static void invoke_rcu_kthread(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       have_rcu_kthread_work = 1;
+       wake_up(&rcu_kthread_wq);
+       local_irq_restore(flags);
 }
 
 /*
@@ -230,6 +255,7 @@ static void __call_rcu(struct rcu_head *head,
        local_irq_save(flags);
        *rcp->curtail = head;
        rcp->curtail = &head->next;
+       RCU_TRACE(rcp->qlen++);
        local_irq_restore(flags);
 }
 
@@ -282,7 +308,16 @@ void rcu_barrier_sched(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
-void __init rcu_init(void)
+/*
+ * Spawn the kthread that invokes RCU callbacks.
+ */
+static int __init rcu_spawn_kthreads(void)
 {
-       open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+       struct sched_param sp;
+
+       rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
+       sp.sched_priority = RCU_BOOST_PRIO;
+       sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
+       return 0;
 }
+early_initcall(rcu_spawn_kthreads);
index 6ceca4f745ffa1f4535c69467ea59704e2ddbe97..015abaea962ad4087130014506b72dc19b33b43d 100644 (file)
  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  */
 
+#include <linux/kthread.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#ifdef CONFIG_RCU_TRACE
+#define RCU_TRACE(stmt)        stmt
+#else /* #ifdef CONFIG_RCU_TRACE */
+#define RCU_TRACE(stmt)
+#endif /* #else #ifdef CONFIG_RCU_TRACE */
+
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+       struct rcu_head *rcucblist;     /* List of pending callbacks (CBs). */
+       struct rcu_head **donetail;     /* ->next pointer of last "done" CB. */
+       struct rcu_head **curtail;      /* ->next pointer of last CB. */
+       RCU_TRACE(long qlen);           /* Number of pending CBs. */
+};
+
+/* Definition for rcupdate control block. */
+static struct rcu_ctrlblk rcu_sched_ctrlblk = {
+       .donetail       = &rcu_sched_ctrlblk.rcucblist,
+       .curtail        = &rcu_sched_ctrlblk.rcucblist,
+};
+
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+       .donetail       = &rcu_bh_ctrlblk.rcucblist,
+       .curtail        = &rcu_bh_ctrlblk.rcucblist,
+};
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 #ifdef CONFIG_TINY_PREEMPT_RCU
 
 #include <linux/delay.h>
@@ -46,17 +80,45 @@ struct rcu_preempt_ctrlblk {
        struct list_head *gp_tasks;
                                /* Pointer to the first task blocking the */
                                /*  current grace period, or NULL if there */
-                               /*  is not such task. */
+                               /*  is no such task. */
        struct list_head *exp_tasks;
                                /* Pointer to first task blocking the */
                                /*  current expedited grace period, or NULL */
                                /*  if there is no such task.  If there */
                                /*  is no current expedited grace period, */
                                /*  then there cannot be any such task. */
+#ifdef CONFIG_RCU_BOOST
+       struct list_head *boost_tasks;
+                               /* Pointer to first task that needs to be */
+                               /*  priority-boosted, or NULL if no priority */
+                               /*  boosting is needed.  If there is no */
+                               /*  current or expedited grace period, there */
+                               /*  can be no such task. */
+#endif /* #ifdef CONFIG_RCU_BOOST */
        u8 gpnum;               /* Current grace period. */
        u8 gpcpu;               /* Last grace period blocked by the CPU. */
        u8 completed;           /* Last grace period completed. */
                                /*  If all three are equal, RCU is idle. */
+#ifdef CONFIG_RCU_BOOST
+       s8 boosted_this_gp;     /* Has boosting already happened? */
+       unsigned long boost_time; /* When to start boosting (jiffies) */
+#endif /* #ifdef CONFIG_RCU_BOOST */
+#ifdef CONFIG_RCU_TRACE
+       unsigned long n_grace_periods;
+#ifdef CONFIG_RCU_BOOST
+       unsigned long n_tasks_boosted;
+       unsigned long n_exp_boosts;
+       unsigned long n_normal_boosts;
+       unsigned long n_normal_balk_blkd_tasks;
+       unsigned long n_normal_balk_gp_tasks;
+       unsigned long n_normal_balk_boost_tasks;
+       unsigned long n_normal_balk_boosted;
+       unsigned long n_normal_balk_notyet;
+       unsigned long n_normal_balk_nos;
+       unsigned long n_exp_balk_blkd_tasks;
+       unsigned long n_exp_balk_nos;
+#endif /* #ifdef CONFIG_RCU_BOOST */
+#endif /* #ifdef CONFIG_RCU_TRACE */
 };
 
 static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
@@ -121,6 +183,210 @@ static int rcu_preempt_gp_in_progress(void)
        return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
 }
 
+/*
+ * Advance a ->blkd_tasks-list pointer to the next entry, instead
+ * returning NULL if at the end of the list.
+ */
+static struct list_head *rcu_next_node_entry(struct task_struct *t)
+{
+       struct list_head *np;
+
+       np = t->rcu_node_entry.next;
+       if (np == &rcu_preempt_ctrlblk.blkd_tasks)
+               np = NULL;
+       return np;
+}
+
+#ifdef CONFIG_RCU_TRACE
+
+#ifdef CONFIG_RCU_BOOST
+static void rcu_initiate_boost_trace(void);
+static void rcu_initiate_exp_boost_trace(void);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+/*
+ * Dump additional statistice for TINY_PREEMPT_RCU.
+ */
+static void show_tiny_preempt_stats(struct seq_file *m)
+{
+       seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n",
+                  rcu_preempt_ctrlblk.rcb.qlen,
+                  rcu_preempt_ctrlblk.n_grace_periods,
+                  rcu_preempt_ctrlblk.gpnum,
+                  rcu_preempt_ctrlblk.gpcpu,
+                  rcu_preempt_ctrlblk.completed,
+                  "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)],
+                  "N."[!rcu_preempt_ctrlblk.gp_tasks],
+                  "E."[!rcu_preempt_ctrlblk.exp_tasks]);
+#ifdef CONFIG_RCU_BOOST
+       seq_printf(m, "             ttb=%c btg=",
+                  "B."[!rcu_preempt_ctrlblk.boost_tasks]);
+       switch (rcu_preempt_ctrlblk.boosted_this_gp) {
+       case -1:
+               seq_puts(m, "exp");
+               break;
+       case 0:
+               seq_puts(m, "no");
+               break;
+       case 1:
+               seq_puts(m, "begun");
+               break;
+       case 2:
+               seq_puts(m, "done");
+               break;
+       default:
+               seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp);
+       }
+       seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
+                  rcu_preempt_ctrlblk.n_tasks_boosted,
+                  rcu_preempt_ctrlblk.n_exp_boosts,
+                  rcu_preempt_ctrlblk.n_normal_boosts,
+                  (int)(jiffies & 0xffff),
+                  (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
+       seq_printf(m, "             %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n",
+                  "normal balk",
+                  rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks,
+                  rcu_preempt_ctrlblk.n_normal_balk_gp_tasks,
+                  rcu_preempt_ctrlblk.n_normal_balk_boost_tasks,
+                  rcu_preempt_ctrlblk.n_normal_balk_boosted,
+                  rcu_preempt_ctrlblk.n_normal_balk_notyet,
+                  rcu_preempt_ctrlblk.n_normal_balk_nos);
+       seq_printf(m, "             exp balk: bt=%lu nos=%lu\n",
+                  rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks,
+                  rcu_preempt_ctrlblk.n_exp_balk_nos);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+}
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
+#ifdef CONFIG_RCU_BOOST
+
+#include "rtmutex_common.h"
+
+/*
+ * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
+ * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
+ */
+static int rcu_boost(void)
+{
+       unsigned long flags;
+       struct rt_mutex mtx;
+       struct list_head *np;
+       struct task_struct *t;
+
+       if (rcu_preempt_ctrlblk.boost_tasks == NULL)
+               return 0;  /* Nothing to boost. */
+       raw_local_irq_save(flags);
+       rcu_preempt_ctrlblk.boosted_this_gp++;
+       t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
+                        rcu_node_entry);
+       np = rcu_next_node_entry(t);
+       rt_mutex_init_proxy_locked(&mtx, t);
+       t->rcu_boost_mutex = &mtx;
+       t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
+       raw_local_irq_restore(flags);
+       rt_mutex_lock(&mtx);
+       RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
+       rcu_preempt_ctrlblk.boosted_this_gp++;
+       rt_mutex_unlock(&mtx);
+       return rcu_preempt_ctrlblk.boost_tasks != NULL;
+}
+
+/*
+ * Check to see if it is now time to start boosting RCU readers blocking
+ * the current grace period, and, if so, tell the rcu_kthread_task to
+ * start boosting them.  If there is an expedited boost in progress,
+ * we wait for it to complete.
+ *
+ * If there are no blocked readers blocking the current grace period,
+ * return 0 to let the caller know, otherwise return 1.  Note that this
+ * return value is independent of whether or not boosting was done.
+ */
+static int rcu_initiate_boost(void)
+{
+       if (!rcu_preempt_blocked_readers_cgp()) {
+               RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++);
+               return 0;
+       }
+       if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
+           rcu_preempt_ctrlblk.boost_tasks == NULL &&
+           rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
+           ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
+               rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
+               invoke_rcu_kthread();
+               RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
+       } else
+               RCU_TRACE(rcu_initiate_boost_trace());
+       return 1;
+}
+
+/*
+ * Initiate boosting for an expedited grace period.
+ */
+static void rcu_initiate_expedited_boost(void)
+{
+       unsigned long flags;
+
+       raw_local_irq_save(flags);
+       if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
+               rcu_preempt_ctrlblk.boost_tasks =
+                       rcu_preempt_ctrlblk.blkd_tasks.next;
+               rcu_preempt_ctrlblk.boosted_this_gp = -1;
+               invoke_rcu_kthread();
+               RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
+       } else
+               RCU_TRACE(rcu_initiate_exp_boost_trace());
+       raw_local_irq_restore(flags);
+}
+
+#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
+
+/*
+ * Do priority-boost accounting for the start of a new grace period.
+ */
+static void rcu_preempt_boost_start_gp(void)
+{
+       rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
+       if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
+               rcu_preempt_ctrlblk.boosted_this_gp = 0;
+}
+
+#else /* #ifdef CONFIG_RCU_BOOST */
+
+/*
+ * If there is no RCU priority boosting, we don't boost.
+ */
+static int rcu_boost(void)
+{
+       return 0;
+}
+
+/*
+ * If there is no RCU priority boosting, we don't initiate boosting,
+ * but we do indicate whether there are blocked readers blocking the
+ * current grace period.
+ */
+static int rcu_initiate_boost(void)
+{
+       return rcu_preempt_blocked_readers_cgp();
+}
+
+/*
+ * If there is no RCU priority boosting, we don't initiate expedited boosting.
+ */
+static void rcu_initiate_expedited_boost(void)
+{
+}
+
+/*
+ * If there is no RCU priority boosting, nothing to do at grace-period start.
+ */
+static void rcu_preempt_boost_start_gp(void)
+{
+}
+
+#endif /* else #ifdef CONFIG_RCU_BOOST */
+
 /*
  * Record a preemptible-RCU quiescent state for the specified CPU.  Note
  * that this just means that the task currently running on the CPU is
@@ -148,11 +414,14 @@ static void rcu_preempt_cpu_qs(void)
        rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
        current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 
+       /* If there is no GP then there is nothing more to do.  */
+       if (!rcu_preempt_gp_in_progress())
+               return;
        /*
-        * If there is no GP, or if blocked readers are still blocking GP,
-        * then there is nothing more to do.
+        * Check up on boosting.  If there are no readers blocking the
+        * current grace period, leave.
         */
-       if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
+       if (rcu_initiate_boost())
                return;
 
        /* Advance callbacks. */
@@ -164,9 +433,9 @@ static void rcu_preempt_cpu_qs(void)
        if (!rcu_preempt_blocked_readers_any())
                rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
 
-       /* If there are done callbacks, make RCU_SOFTIRQ process them. */
+       /* If there are done callbacks, cause them to be invoked. */
        if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
-               raise_softirq(RCU_SOFTIRQ);
+               invoke_rcu_kthread();
 }
 
 /*
@@ -178,12 +447,16 @@ static void rcu_preempt_start_gp(void)
 
                /* Official start of GP. */
                rcu_preempt_ctrlblk.gpnum++;
+               RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
 
                /* Any blocked RCU readers block new GP. */
                if (rcu_preempt_blocked_readers_any())
                        rcu_preempt_ctrlblk.gp_tasks =
                                rcu_preempt_ctrlblk.blkd_tasks.next;
 
+               /* Set up for RCU priority boosting. */
+               rcu_preempt_boost_start_gp();
+
                /* If there is no running reader, CPU is done with GP. */
                if (!rcu_preempt_running_reader())
                        rcu_preempt_cpu_qs();
@@ -304,14 +577,16 @@ static void rcu_read_unlock_special(struct task_struct *t)
                 */
                empty = !rcu_preempt_blocked_readers_cgp();
                empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
-               np = t->rcu_node_entry.next;
-               if (np == &rcu_preempt_ctrlblk.blkd_tasks)
-                       np = NULL;
+               np = rcu_next_node_entry(t);
                list_del(&t->rcu_node_entry);
                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
                        rcu_preempt_ctrlblk.gp_tasks = np;
                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
                        rcu_preempt_ctrlblk.exp_tasks = np;
+#ifdef CONFIG_RCU_BOOST
+               if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
+                       rcu_preempt_ctrlblk.boost_tasks = np;
+#endif /* #ifdef CONFIG_RCU_BOOST */
                INIT_LIST_HEAD(&t->rcu_node_entry);
 
                /*
@@ -331,6 +606,14 @@ static void rcu_read_unlock_special(struct task_struct *t)
                if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
                        rcu_report_exp_done();
        }
+#ifdef CONFIG_RCU_BOOST
+       /* Unboost self if was boosted. */
+       if (special & RCU_READ_UNLOCK_BOOSTED) {
+               t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
+               rt_mutex_unlock(t->rcu_boost_mutex);
+               t->rcu_boost_mutex = NULL;
+       }
+#endif /* #ifdef CONFIG_RCU_BOOST */
        local_irq_restore(flags);
 }
 
@@ -374,7 +657,7 @@ static void rcu_preempt_check_callbacks(void)
                rcu_preempt_cpu_qs();
        if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
            rcu_preempt_ctrlblk.rcb.donetail)
-               raise_softirq(RCU_SOFTIRQ);
+               invoke_rcu_kthread();
        if (rcu_preempt_gp_in_progress() &&
            rcu_cpu_blocking_cur_gp() &&
            rcu_preempt_running_reader())
@@ -383,7 +666,7 @@ static void rcu_preempt_check_callbacks(void)
 
 /*
  * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
- * update, so this is invoked from __rcu_process_callbacks() to
+ * update, so this is invoked from rcu_process_callbacks() to
  * handle that case.  Of course, it is invoked for all flavors of
  * RCU, but RCU callbacks can appear only on one of the lists, and
  * neither ->nexttail nor ->donetail can possibly be NULL, so there
@@ -400,7 +683,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
  */
 static void rcu_preempt_process_callbacks(void)
 {
-       __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
+       rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
 }
 
 /*
@@ -417,6 +700,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
        local_irq_save(flags);
        *rcu_preempt_ctrlblk.nexttail = head;
        rcu_preempt_ctrlblk.nexttail = &head->next;
+       RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++);
        rcu_preempt_start_gp();  /* checks to see if GP needed. */
        local_irq_restore(flags);
 }
@@ -532,6 +816,7 @@ void synchronize_rcu_expedited(void)
 
        /* Wait for tail of ->blkd_tasks list to drain. */
        if (rcu_preempted_readers_exp())
+               rcu_initiate_expedited_boost();
                wait_event(sync_rcu_preempt_exp_wq,
                           !rcu_preempted_readers_exp());
 
@@ -572,6 +857,27 @@ void exit_rcu(void)
 
 #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
 
+#ifdef CONFIG_RCU_TRACE
+
+/*
+ * Because preemptible RCU does not exist, it is not necessary to
+ * dump out its statistics.
+ */
+static void show_tiny_preempt_stats(struct seq_file *m)
+{
+}
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
+/*
+ * Because preemptible RCU does not exist, it is never necessary to
+ * boost preempted RCU readers.
+ */
+static int rcu_boost(void)
+{
+       return 0;
+}
+
 /*
  * Because preemptible RCU does not exist, it never has any callbacks
  * to check.
@@ -599,17 +905,116 @@ static void rcu_preempt_process_callbacks(void)
 #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-
 #include <linux/kernel_stat.h>
 
 /*
  * During boot, we forgive RCU lockdep issues.  After this function is
  * invoked, we start taking RCU lockdep issues seriously.
  */
-void rcu_scheduler_starting(void)
+void __init rcu_scheduler_starting(void)
 {
        WARN_ON(nr_context_switches() > 0);
        rcu_scheduler_active = 1;
 }
 
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+#ifdef CONFIG_RCU_BOOST
+#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
+#else /* #ifdef CONFIG_RCU_BOOST */
+#define RCU_BOOST_PRIO 1
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
+
+#ifdef CONFIG_RCU_TRACE
+
+#ifdef CONFIG_RCU_BOOST
+
+static void rcu_initiate_boost_trace(void)
+{
+       if (rcu_preempt_ctrlblk.gp_tasks == NULL)
+               rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++;
+       else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
+               rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++;
+       else if (rcu_preempt_ctrlblk.boosted_this_gp != 0)
+               rcu_preempt_ctrlblk.n_normal_balk_boosted++;
+       else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
+               rcu_preempt_ctrlblk.n_normal_balk_notyet++;
+       else
+               rcu_preempt_ctrlblk.n_normal_balk_nos++;
+}
+
+static void rcu_initiate_exp_boost_trace(void)
+{
+       if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
+               rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++;
+       else
+               rcu_preempt_ctrlblk.n_exp_balk_nos++;
+}
+
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
+{
+       unsigned long flags;
+
+       raw_local_irq_save(flags);
+       rcp->qlen -= n;
+       raw_local_irq_restore(flags);
+}
+
+/*
+ * Dump statistics for TINY_RCU, such as they are.
+ */
+static int show_tiny_stats(struct seq_file *m, void *unused)
+{
+       show_tiny_preempt_stats(m);
+       seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
+       seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
+       return 0;
+}
+
+static int show_tiny_stats_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_tiny_stats, NULL);
+}
+
+static const struct file_operations show_tiny_stats_fops = {
+       .owner = THIS_MODULE,
+       .open = show_tiny_stats_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static struct dentry *rcudir;
+
+static int __init rcutiny_trace_init(void)
+{
+       struct dentry *retval;
+
+       rcudir = debugfs_create_dir("rcu", NULL);
+       if (!rcudir)
+               goto free_out;
+       retval = debugfs_create_file("rcudata", 0444, rcudir,
+                                    NULL, &show_tiny_stats_fops);
+       if (!retval)
+               goto free_out;
+       return 0;
+free_out:
+       debugfs_remove_recursive(rcudir);
+       return 1;
+}
+
+static void __exit rcutiny_trace_cleanup(void)
+{
+       debugfs_remove_recursive(rcudir);
+}
+
+module_init(rcutiny_trace_init);
+module_exit(rcutiny_trace_cleanup);
+
+MODULE_AUTHOR("Paul E. McKenney");
+MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
+MODULE_LICENSE("GPL");
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
index 9d8e8fb2515f4e4801c214841a7f8c95b8b45ffe..89613f97ff264e35cac497419bd0a4dac798ce5f 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/srcu.h>
 #include <linux/slab.h>
 #include <asm/byteorder.h>
+#include <linux/sched.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
@@ -64,6 +65,9 @@ static int irqreader = 1;     /* RCU readers from irq (timers). */
 static int fqs_duration = 0;   /* Duration of bursts (us), 0 to disable. */
 static int fqs_holdoff = 0;    /* Hold time within burst (us). */
 static int fqs_stutter = 3;    /* Wait time between bursts (s). */
+static int test_boost = 1;     /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */
+static int test_boost_interval = 7; /* Interval between boost tests, seconds. */
+static int test_boost_duration = 4; /* Duration of each boost test, seconds. */
 static char *torture_type = "rcu"; /* What RCU implementation to torture. */
 
 module_param(nreaders, int, 0444);
@@ -88,6 +92,12 @@ module_param(fqs_holdoff, int, 0444);
 MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
 module_param(fqs_stutter, int, 0444);
 MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
+module_param(test_boost, int, 0444);
+MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
+module_param(test_boost_interval, int, 0444);
+MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds.");
+module_param(test_boost_duration, int, 0444);
+MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds.");
 module_param(torture_type, charp, 0444);
 MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
 
@@ -109,6 +119,7 @@ static struct task_struct *stats_task;
 static struct task_struct *shuffler_task;
 static struct task_struct *stutter_task;
 static struct task_struct *fqs_task;
+static struct task_struct *boost_tasks[NR_CPUS];
 
 #define RCU_TORTURE_PIPE_LEN 10
 
@@ -134,6 +145,12 @@ static atomic_t n_rcu_torture_alloc_fail;
 static atomic_t n_rcu_torture_free;
 static atomic_t n_rcu_torture_mberror;
 static atomic_t n_rcu_torture_error;
+static long n_rcu_torture_boost_ktrerror;
+static long n_rcu_torture_boost_rterror;
+static long n_rcu_torture_boost_allocerror;
+static long n_rcu_torture_boost_afferror;
+static long n_rcu_torture_boost_failure;
+static long n_rcu_torture_boosts;
 static long n_rcu_torture_timers;
 static struct list_head rcu_torture_removed;
 static cpumask_var_t shuffle_tmp_mask;
@@ -147,6 +164,16 @@ static int stutter_pause_test;
 #endif
 int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
 
+#ifdef CONFIG_RCU_BOOST
+#define rcu_can_boost() 1
+#else /* #ifdef CONFIG_RCU_BOOST */
+#define rcu_can_boost() 0
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
+
+static unsigned long boost_starttime;  /* jiffies of next boost test start. */
+DEFINE_MUTEX(boost_mutex);             /* protect setting boost_starttime */
+                                       /*  and boost task create/destroy. */
+
 /* Mediate rmmod and system shutdown.  Concurrent rmmod & shutdown illegal! */
 
 #define FULLSTOP_DONTSTOP 0    /* Normal operation. */
@@ -277,6 +304,7 @@ struct rcu_torture_ops {
        void (*fqs)(void);
        int (*stats)(char *page);
        int irq_capable;
+       int can_boost;
        char *name;
 };
 
@@ -366,6 +394,7 @@ static struct rcu_torture_ops rcu_ops = {
        .fqs            = rcu_force_quiescent_state,
        .stats          = NULL,
        .irq_capable    = 1,
+       .can_boost      = rcu_can_boost(),
        .name           = "rcu"
 };
 
@@ -408,6 +437,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
        .fqs            = rcu_force_quiescent_state,
        .stats          = NULL,
        .irq_capable    = 1,
+       .can_boost      = rcu_can_boost(),
        .name           = "rcu_sync"
 };
 
@@ -424,6 +454,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
        .fqs            = rcu_force_quiescent_state,
        .stats          = NULL,
        .irq_capable    = 1,
+       .can_boost      = rcu_can_boost(),
        .name           = "rcu_expedited"
 };
 
@@ -683,6 +714,110 @@ static struct rcu_torture_ops sched_expedited_ops = {
        .name           = "sched_expedited"
 };
 
+/*
+ * RCU torture priority-boost testing.  Runs one real-time thread per
+ * CPU for moderate bursts, repeatedly registering RCU callbacks and
+ * spinning waiting for them to be invoked.  If a given callback takes
+ * too long to be invoked, we assume that priority inversion has occurred.
+ */
+
+struct rcu_boost_inflight {
+       struct rcu_head rcu;
+       int inflight;
+};
+
+static void rcu_torture_boost_cb(struct rcu_head *head)
+{
+       struct rcu_boost_inflight *rbip =
+               container_of(head, struct rcu_boost_inflight, rcu);
+
+       smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */
+       rbip->inflight = 0;
+}
+
+static int rcu_torture_boost(void *arg)
+{
+       unsigned long call_rcu_time;
+       unsigned long endtime;
+       unsigned long oldstarttime;
+       struct rcu_boost_inflight rbi = { .inflight = 0 };
+       struct sched_param sp;
+
+       VERBOSE_PRINTK_STRING("rcu_torture_boost started");
+
+       /* Set real-time priority. */
+       sp.sched_priority = 1;
+       if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) {
+               VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!");
+               n_rcu_torture_boost_rterror++;
+       }
+
+       /* Each pass through the following loop does one boost-test cycle. */
+       do {
+               /* Wait for the next test interval. */
+               oldstarttime = boost_starttime;
+               while (jiffies - oldstarttime > ULONG_MAX / 2) {
+                       schedule_timeout_uninterruptible(1);
+                       rcu_stutter_wait("rcu_torture_boost");
+                       if (kthread_should_stop() ||
+                           fullstop != FULLSTOP_DONTSTOP)
+                               goto checkwait;
+               }
+
+               /* Do one boost-test interval. */
+               endtime = oldstarttime + test_boost_duration * HZ;
+               call_rcu_time = jiffies;
+               while (jiffies - endtime > ULONG_MAX / 2) {
+                       /* If we don't have a callback in flight, post one. */
+                       if (!rbi.inflight) {
+                               smp_mb(); /* RCU core before ->inflight = 1. */
+                               rbi.inflight = 1;
+                               call_rcu(&rbi.rcu, rcu_torture_boost_cb);
+                               if (jiffies - call_rcu_time >
+                                        test_boost_duration * HZ - HZ / 2) {
+                                       VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed");
+                                       n_rcu_torture_boost_failure++;
+                               }
+                               call_rcu_time = jiffies;
+                       }
+                       cond_resched();
+                       rcu_stutter_wait("rcu_torture_boost");
+                       if (kthread_should_stop() ||
+                           fullstop != FULLSTOP_DONTSTOP)
+                               goto checkwait;
+               }
+
+               /*
+                * Set the start time of the next test interval.
+                * Yes, this is vulnerable to long delays, but such
+                * delays simply cause a false negative for the next
+                * interval.  Besides, we are running at RT priority,
+                * so delays should be relatively rare.
+                */
+               while (oldstarttime == boost_starttime) {
+                       if (mutex_trylock(&boost_mutex)) {
+                               boost_starttime = jiffies +
+                                                 test_boost_interval * HZ;
+                               n_rcu_torture_boosts++;
+                               mutex_unlock(&boost_mutex);
+                               break;
+                       }
+                       schedule_timeout_uninterruptible(1);
+               }
+
+               /* Go do the stutter. */
+checkwait:     rcu_stutter_wait("rcu_torture_boost");
+       } while (!kthread_should_stop() && fullstop  == FULLSTOP_DONTSTOP);
+
+       /* Clean up and exit. */
+       VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
+       rcutorture_shutdown_absorb("rcu_torture_boost");
+       while (!kthread_should_stop() || rbi.inflight)
+               schedule_timeout_uninterruptible(1);
+       smp_mb(); /* order accesses to ->inflight before stack-frame death. */
+       return 0;
+}
+
 /*
  * RCU torture force-quiescent-state kthread.  Repeatedly induces
  * bursts of calls to force_quiescent_state(), increasing the probability
@@ -933,7 +1068,8 @@ rcu_torture_printk(char *page)
        cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
        cnt += sprintf(&page[cnt],
                       "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
-                      "rtmbe: %d nt: %ld",
+                      "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld "
+                      "rtbf: %ld rtb: %ld nt: %ld",
                       rcu_torture_current,
                       rcu_torture_current_version,
                       list_empty(&rcu_torture_freelist),
@@ -941,8 +1077,19 @@ rcu_torture_printk(char *page)
                       atomic_read(&n_rcu_torture_alloc_fail),
                       atomic_read(&n_rcu_torture_free),
                       atomic_read(&n_rcu_torture_mberror),
+                      n_rcu_torture_boost_ktrerror,
+                      n_rcu_torture_boost_rterror,
+                      n_rcu_torture_boost_allocerror,
+                      n_rcu_torture_boost_afferror,
+                      n_rcu_torture_boost_failure,
+                      n_rcu_torture_boosts,
                       n_rcu_torture_timers);
-       if (atomic_read(&n_rcu_torture_mberror) != 0)
+       if (atomic_read(&n_rcu_torture_mberror) != 0 ||
+           n_rcu_torture_boost_ktrerror != 0 ||
+           n_rcu_torture_boost_rterror != 0 ||
+           n_rcu_torture_boost_allocerror != 0 ||
+           n_rcu_torture_boost_afferror != 0 ||
+           n_rcu_torture_boost_failure != 0)
                cnt += sprintf(&page[cnt], " !!!");
        cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
        if (i > 1) {
@@ -1094,22 +1241,91 @@ rcu_torture_stutter(void *arg)
 }
 
 static inline void
-rcu_torture_print_module_parms(char *tag)
+rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
 {
        printk(KERN_ALERT "%s" TORTURE_FLAG
                "--- %s: nreaders=%d nfakewriters=%d "
                "stat_interval=%d verbose=%d test_no_idle_hz=%d "
                "shuffle_interval=%d stutter=%d irqreader=%d "
-               "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n",
+               "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
+               "test_boost=%d/%d test_boost_interval=%d "
+               "test_boost_duration=%d\n",
                torture_type, tag, nrealreaders, nfakewriters,
                stat_interval, verbose, test_no_idle_hz, shuffle_interval,
-               stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter);
+               stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
+               test_boost, cur_ops->can_boost,
+               test_boost_interval, test_boost_duration);
 }
 
-static struct notifier_block rcutorture_nb = {
+static struct notifier_block rcutorture_shutdown_nb = {
        .notifier_call = rcutorture_shutdown_notify,
 };
 
+static void rcutorture_booster_cleanup(int cpu)
+{
+       struct task_struct *t;
+
+       if (boost_tasks[cpu] == NULL)
+               return;
+       mutex_lock(&boost_mutex);
+       VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task");
+       t = boost_tasks[cpu];
+       boost_tasks[cpu] = NULL;
+       mutex_unlock(&boost_mutex);
+
+       /* This must be outside of the mutex, otherwise deadlock! */
+       kthread_stop(t);
+}
+
+static int rcutorture_booster_init(int cpu)
+{
+       int retval;
+
+       if (boost_tasks[cpu] != NULL)
+               return 0;  /* Already created, nothing more to do. */
+
+       /* Don't allow time recalculation while creating a new task. */
+       mutex_lock(&boost_mutex);
+       VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
+       boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL,
+                                         "rcu_torture_boost");
+       if (IS_ERR(boost_tasks[cpu])) {
+               retval = PTR_ERR(boost_tasks[cpu]);
+               VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed");
+               n_rcu_torture_boost_ktrerror++;
+               boost_tasks[cpu] = NULL;
+               mutex_unlock(&boost_mutex);
+               return retval;
+       }
+       kthread_bind(boost_tasks[cpu], cpu);
+       wake_up_process(boost_tasks[cpu]);
+       mutex_unlock(&boost_mutex);
+       return 0;
+}
+
+static int rcutorture_cpu_notify(struct notifier_block *self,
+                                unsigned long action, void *hcpu)
+{
+       long cpu = (long)hcpu;
+
+       switch (action) {
+       case CPU_ONLINE:
+       case CPU_DOWN_FAILED:
+               (void)rcutorture_booster_init(cpu);
+               break;
+       case CPU_DOWN_PREPARE:
+               rcutorture_booster_cleanup(cpu);
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block rcutorture_cpu_nb = {
+       .notifier_call = rcutorture_cpu_notify,
+};
+
 static void
 rcu_torture_cleanup(void)
 {
@@ -1127,7 +1343,7 @@ rcu_torture_cleanup(void)
        }
        fullstop = FULLSTOP_RMMOD;
        mutex_unlock(&fullstop_mutex);
-       unregister_reboot_notifier(&rcutorture_nb);
+       unregister_reboot_notifier(&rcutorture_shutdown_nb);
        if (stutter_task) {
                VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
                kthread_stop(stutter_task);
@@ -1184,6 +1400,12 @@ rcu_torture_cleanup(void)
                kthread_stop(fqs_task);
        }
        fqs_task = NULL;
+       if ((test_boost == 1 && cur_ops->can_boost) ||
+           test_boost == 2) {
+               unregister_cpu_notifier(&rcutorture_cpu_nb);
+               for_each_possible_cpu(i)
+                       rcutorture_booster_cleanup(i);
+       }
 
        /* Wait for all RCU callbacks to fire.  */
 
@@ -1195,9 +1417,9 @@ rcu_torture_cleanup(void)
        if (cur_ops->cleanup)
                cur_ops->cleanup();
        if (atomic_read(&n_rcu_torture_error))
-               rcu_torture_print_module_parms("End of test: FAILURE");
+               rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
        else
-               rcu_torture_print_module_parms("End of test: SUCCESS");
+               rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
 }
 
 static int __init
@@ -1242,7 +1464,7 @@ rcu_torture_init(void)
                nrealreaders = nreaders;
        else
                nrealreaders = 2 * num_online_cpus();
-       rcu_torture_print_module_parms("Start of test");
+       rcu_torture_print_module_parms(cur_ops, "Start of test");
        fullstop = FULLSTOP_DONTSTOP;
 
        /* Set up the freelist. */
@@ -1263,6 +1485,12 @@ rcu_torture_init(void)
        atomic_set(&n_rcu_torture_free, 0);
        atomic_set(&n_rcu_torture_mberror, 0);
        atomic_set(&n_rcu_torture_error, 0);
+       n_rcu_torture_boost_ktrerror = 0;
+       n_rcu_torture_boost_rterror = 0;
+       n_rcu_torture_boost_allocerror = 0;
+       n_rcu_torture_boost_afferror = 0;
+       n_rcu_torture_boost_failure = 0;
+       n_rcu_torture_boosts = 0;
        for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
                atomic_set(&rcu_torture_wcount[i], 0);
        for_each_possible_cpu(cpu) {
@@ -1376,7 +1604,27 @@ rcu_torture_init(void)
                        goto unwind;
                }
        }
-       register_reboot_notifier(&rcutorture_nb);
+       if (test_boost_interval < 1)
+               test_boost_interval = 1;
+       if (test_boost_duration < 2)
+               test_boost_duration = 2;
+       if ((test_boost == 1 && cur_ops->can_boost) ||
+           test_boost == 2) {
+               int retval;
+
+               boost_starttime = jiffies + test_boost_interval * HZ;
+               register_cpu_notifier(&rcutorture_cpu_nb);
+               for_each_possible_cpu(i) {
+                       if (cpu_is_offline(i))
+                               continue;  /* Heuristic: CPU can go offline. */
+                       retval = rcutorture_booster_init(i);
+                       if (retval < 0) {
+                               firsterr = retval;
+                               goto unwind;
+                       }
+               }
+       }
+       register_reboot_notifier(&rcutorture_shutdown_nb);
        mutex_unlock(&fullstop_mutex);
        return 0;
 
index ccdc04c479815addc8dbacea69643174a4636670..d0ddfea6579d027809cfb0bce885289bac0f957e 100644 (file)
@@ -67,9 +67,6 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
        .gpnum = -300, \
        .completed = -300, \
        .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \
-       .orphan_cbs_list = NULL, \
-       .orphan_cbs_tail = &structname.orphan_cbs_list, \
-       .orphan_qlen = 0, \
        .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \
        .n_force_qs = 0, \
        .n_force_qs_ngp = 0, \
@@ -620,9 +617,17 @@ static void __init check_cpu_stall_init(void)
 static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
 {
        if (rdp->gpnum != rnp->gpnum) {
-               rdp->qs_pending = 1;
-               rdp->passed_quiesc = 0;
+               /*
+                * If the current grace period is waiting for this CPU,
+                * set up to detect a quiescent state, otherwise don't
+                * go looking for one.
+                */
                rdp->gpnum = rnp->gpnum;
+               if (rnp->qsmask & rdp->grpmask) {
+                       rdp->qs_pending = 1;
+                       rdp->passed_quiesc = 0;
+               } else
+                       rdp->qs_pending = 0;
        }
 }
 
@@ -681,6 +686,24 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
 
                /* Remember that we saw this grace-period completion. */
                rdp->completed = rnp->completed;
+
+               /*
+                * If we were in an extended quiescent state, we may have
+                * missed some grace periods that others CPUs handled on
+                * our behalf. Catch up with this state to avoid noting
+                * spurious new grace periods.  If another grace period
+                * has started, then rnp->gpnum will have advanced, so
+                * we will detect this later on.
+                */
+               if (ULONG_CMP_LT(rdp->gpnum, rdp->completed))
+                       rdp->gpnum = rdp->completed;
+
+               /*
+                * If RCU does not need a quiescent state from this CPU,
+                * then make sure that this CPU doesn't go looking for one.
+                */
+               if ((rnp->qsmask & rdp->grpmask) == 0)
+                       rdp->qs_pending = 0;
        }
 }
 
@@ -984,53 +1007,31 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
- * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the
- * specified flavor of RCU.  The callbacks will be adopted by the next
- * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever
- * comes first.  Because this is invoked from the CPU_DYING notifier,
- * irqs are already disabled.
+ * Move a dying CPU's RCU callbacks to online CPU's callback list.
+ * Synchronization is not required because this function executes
+ * in stop_machine() context.
  */
-static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
+static void rcu_send_cbs_to_online(struct rcu_state *rsp)
 {
        int i;
+       /* current DYING CPU is cleared in the cpu_online_mask */
+       int receive_cpu = cpumask_any(cpu_online_mask);
        struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+       struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
 
        if (rdp->nxtlist == NULL)
                return;  /* irqs disabled, so comparison is stable. */
-       raw_spin_lock(&rsp->onofflock);  /* irqs already disabled. */
-       *rsp->orphan_cbs_tail = rdp->nxtlist;
-       rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL];
+
+       *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
+       receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+       receive_rdp->qlen += rdp->qlen;
+       receive_rdp->n_cbs_adopted += rdp->qlen;
+       rdp->n_cbs_orphaned += rdp->qlen;
+
        rdp->nxtlist = NULL;
        for (i = 0; i < RCU_NEXT_SIZE; i++)
                rdp->nxttail[i] = &rdp->nxtlist;
-       rsp->orphan_qlen += rdp->qlen;
-       rdp->n_cbs_orphaned += rdp->qlen;
        rdp->qlen = 0;
-       raw_spin_unlock(&rsp->onofflock);  /* irqs remain disabled. */
-}
-
-/*
- * Adopt previously orphaned RCU callbacks.
- */
-static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
-{
-       unsigned long flags;
-       struct rcu_data *rdp;
-
-       raw_spin_lock_irqsave(&rsp->onofflock, flags);
-       rdp = this_cpu_ptr(rsp->rda);
-       if (rsp->orphan_cbs_list == NULL) {
-               raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
-               return;
-       }
-       *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list;
-       rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail;
-       rdp->qlen += rsp->orphan_qlen;
-       rdp->n_cbs_adopted += rsp->orphan_qlen;
-       rsp->orphan_cbs_list = NULL;
-       rsp->orphan_cbs_tail = &rsp->orphan_cbs_list;
-       rsp->orphan_qlen = 0;
-       raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
 }
 
 /*
@@ -1081,8 +1082,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
        if (need_report & RCU_OFL_TASKS_EXP_GP)
                rcu_report_exp_rnp(rsp, rnp);
-
-       rcu_adopt_orphan_cbs(rsp);
 }
 
 /*
@@ -1100,11 +1099,7 @@ static void rcu_offline_cpu(int cpu)
 
 #else /* #ifdef CONFIG_HOTPLUG_CPU */
 
-static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
-{
-}
-
-static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
+static void rcu_send_cbs_to_online(struct rcu_state *rsp)
 {
 }
 
@@ -1440,22 +1435,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
         */
        local_irq_save(flags);
        rdp = this_cpu_ptr(rsp->rda);
-       rcu_process_gp_end(rsp, rdp);
-       check_for_new_grace_period(rsp, rdp);
 
        /* Add the callback to our list. */
        *rdp->nxttail[RCU_NEXT_TAIL] = head;
        rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
 
-       /* Start a new grace period if one not already started. */
-       if (!rcu_gp_in_progress(rsp)) {
-               unsigned long nestflag;
-               struct rcu_node *rnp_root = rcu_get_root(rsp);
-
-               raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
-               rcu_start_gp(rsp, nestflag);  /* releases rnp_root->lock. */
-       }
-
        /*
         * Force the grace period if too many callbacks or too long waiting.
         * Enforce hysteresis, and don't invoke force_quiescent_state()
@@ -1464,12 +1448,27 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
         * is the only one waiting for a grace period to complete.
         */
        if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
-               rdp->blimit = LONG_MAX;
-               if (rsp->n_force_qs == rdp->n_force_qs_snap &&
-                   *rdp->nxttail[RCU_DONE_TAIL] != head)
-                       force_quiescent_state(rsp, 0);
-               rdp->n_force_qs_snap = rsp->n_force_qs;
-               rdp->qlen_last_fqs_check = rdp->qlen;
+
+               /* Are we ignoring a completed grace period? */
+               rcu_process_gp_end(rsp, rdp);
+               check_for_new_grace_period(rsp, rdp);
+
+               /* Start a new grace period if one not already started. */
+               if (!rcu_gp_in_progress(rsp)) {
+                       unsigned long nestflag;
+                       struct rcu_node *rnp_root = rcu_get_root(rsp);
+
+                       raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
+                       rcu_start_gp(rsp, nestflag);  /* rlses rnp_root->lock */
+               } else {
+                       /* Give the grace period a kick. */
+                       rdp->blimit = LONG_MAX;
+                       if (rsp->n_force_qs == rdp->n_force_qs_snap &&
+                           *rdp->nxttail[RCU_DONE_TAIL] != head)
+                               force_quiescent_state(rsp, 0);
+                       rdp->n_force_qs_snap = rsp->n_force_qs;
+                       rdp->qlen_last_fqs_check = rdp->qlen;
+               }
        } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
                force_quiescent_state(rsp, 1);
        local_irq_restore(flags);
@@ -1699,13 +1698,12 @@ static void _rcu_barrier(struct rcu_state *rsp,
         * decrement rcu_barrier_cpu_count -- otherwise the first CPU
         * might complete its grace period before all of the other CPUs
         * did their increment, causing this function to return too
-        * early.
+        * early.  Note that on_each_cpu() disables irqs, which prevents
+        * any CPUs from coming online or going offline until each online
+        * CPU has queued its RCU-barrier callback.
         */
        atomic_set(&rcu_barrier_cpu_count, 1);
-       preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */
-       rcu_adopt_orphan_cbs(rsp);
        on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
-       preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */
        if (atomic_dec_and_test(&rcu_barrier_cpu_count))
                complete(&rcu_barrier_completion);
        wait_for_completion(&rcu_barrier_completion);
@@ -1831,18 +1829,13 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
        case CPU_DYING:
        case CPU_DYING_FROZEN:
                /*
-                * preempt_disable() in _rcu_barrier() prevents stop_machine(),
-                * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
-                * returns, all online cpus have queued rcu_barrier_func().
-                * The dying CPU clears its cpu_online_mask bit and
-                * moves all of its RCU callbacks to ->orphan_cbs_list
-                * in the context of stop_machine(), so subsequent calls
-                * to _rcu_barrier() will adopt these callbacks and only
-                * then queue rcu_barrier_func() on all remaining CPUs.
+                * The whole machine is "stopped" except this CPU, so we can
+                * touch any data without introducing corruption. We send the
+                * dying CPU's callbacks to an arbitrarily chosen online CPU.
                 */
-               rcu_send_cbs_to_orphanage(&rcu_bh_state);
-               rcu_send_cbs_to_orphanage(&rcu_sched_state);
-               rcu_preempt_send_cbs_to_orphanage();
+               rcu_send_cbs_to_online(&rcu_bh_state);
+               rcu_send_cbs_to_online(&rcu_sched_state);
+               rcu_preempt_send_cbs_to_online();
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
@@ -1880,8 +1873,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
 {
        int i;
 
-       for (i = NUM_RCU_LVLS - 1; i >= 0; i--)
+       for (i = NUM_RCU_LVLS - 1; i > 0; i--)
                rsp->levelspread[i] = CONFIG_RCU_FANOUT;
+       rsp->levelspread[0] = RCU_FANOUT_LEAF;
 }
 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
 static void __init rcu_init_levelspread(struct rcu_state *rsp)
index 91d4170c5c13afd2e8997bd59b28e7cc2a4385e8..e8f057e44e3ee00466e840593983ca5062302545 100644 (file)
 /*
  * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
  * In theory, it should be possible to add more levels straightforwardly.
- * In practice, this has not been tested, so there is probably some
- * bug somewhere.
+ * In practice, this did work well going from three levels to four.
+ * Of course, your mileage may vary.
  */
 #define MAX_RCU_LVLS 4
-#define RCU_FANOUT           (CONFIG_RCU_FANOUT)
-#define RCU_FANOUT_SQ        (RCU_FANOUT * RCU_FANOUT)
-#define RCU_FANOUT_CUBE              (RCU_FANOUT_SQ * RCU_FANOUT)
-#define RCU_FANOUT_FOURTH     (RCU_FANOUT_CUBE * RCU_FANOUT)
-
-#if NR_CPUS <= RCU_FANOUT
+#if CONFIG_RCU_FANOUT > 16
+#define RCU_FANOUT_LEAF       16
+#else /* #if CONFIG_RCU_FANOUT > 16 */
+#define RCU_FANOUT_LEAF       (CONFIG_RCU_FANOUT)
+#endif /* #else #if CONFIG_RCU_FANOUT > 16 */
+#define RCU_FANOUT_1         (RCU_FANOUT_LEAF)
+#define RCU_FANOUT_2         (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
+#define RCU_FANOUT_3         (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
+#define RCU_FANOUT_4         (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
+
+#if NR_CPUS <= RCU_FANOUT_1
 #  define NUM_RCU_LVLS       1
 #  define NUM_RCU_LVL_0              1
 #  define NUM_RCU_LVL_1              (NR_CPUS)
 #  define NUM_RCU_LVL_2              0
 #  define NUM_RCU_LVL_3              0
 #  define NUM_RCU_LVL_4              0
-#elif NR_CPUS <= RCU_FANOUT_SQ
+#elif NR_CPUS <= RCU_FANOUT_2
 #  define NUM_RCU_LVLS       2
 #  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
 #  define NUM_RCU_LVL_2              (NR_CPUS)
 #  define NUM_RCU_LVL_3              0
 #  define NUM_RCU_LVL_4              0
-#elif NR_CPUS <= RCU_FANOUT_CUBE
+#elif NR_CPUS <= RCU_FANOUT_3
 #  define NUM_RCU_LVLS       3
 #  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
-#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
-#  define NUM_RCU_LVL_3              NR_CPUS
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_LVL_3              (NR_CPUS)
 #  define NUM_RCU_LVL_4              0
-#elif NR_CPUS <= RCU_FANOUT_FOURTH
+#elif NR_CPUS <= RCU_FANOUT_4
 #  define NUM_RCU_LVLS       4
 #  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE)
-#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
-#  define NUM_RCU_LVL_3              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
-#  define NUM_RCU_LVL_4              NR_CPUS
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_3              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_LVL_4              (NR_CPUS)
 #else
 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
-#endif /* #if (NR_CPUS) <= RCU_FANOUT */
+#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
 
 #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
@@ -203,8 +208,8 @@ struct rcu_data {
        long            qlen_last_fqs_check;
                                        /* qlen at last check for QS forcing */
        unsigned long   n_cbs_invoked;  /* count of RCU cbs invoked. */
-       unsigned long   n_cbs_orphaned; /* RCU cbs sent to orphanage. */
-       unsigned long   n_cbs_adopted;  /* RCU cbs adopted from orphanage. */
+       unsigned long   n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
+       unsigned long   n_cbs_adopted;  /* RCU cbs adopted from dying CPU */
        unsigned long   n_force_qs_snap;
                                        /* did other CPU force QS recently? */
        long            blimit;         /* Upper limit on a processed batch */
@@ -309,15 +314,7 @@ struct rcu_state {
        /* End of fields guarded by root rcu_node's lock. */
 
        raw_spinlock_t onofflock;               /* exclude on/offline and */
-                                               /*  starting new GP.  Also */
-                                               /*  protects the following */
-                                               /*  orphan_cbs fields. */
-       struct rcu_head *orphan_cbs_list;       /* list of rcu_head structs */
-                                               /*  orphaned by all CPUs in */
-                                               /*  a given leaf rcu_node */
-                                               /*  going offline. */
-       struct rcu_head **orphan_cbs_tail;      /* And tail pointer. */
-       long orphan_qlen;                       /* Number of orphaned cbs. */
+                                               /*  starting new GP. */
        raw_spinlock_t fqslock;                 /* Only one task forcing */
                                                /*  quiescent states. */
        unsigned long jiffies_force_qs;         /* Time at which to invoke */
@@ -390,7 +387,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
 static int rcu_preempt_pending(int cpu);
 static int rcu_preempt_needs_cpu(int cpu);
 static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
-static void rcu_preempt_send_cbs_to_orphanage(void);
+static void rcu_preempt_send_cbs_to_online(void);
 static void __init __rcu_init_preempt(void);
 static void rcu_needs_cpu_flush(void);
 
index 71a4147473f95f51d2b2e88db4c14372dafe375f..a3638710dc67f4627f5cdb88e1cafb43b500d24a 100644 (file)
@@ -25,6 +25,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/stop_machine.h>
 
 /*
  * Check the RCU kernel configuration parameters and print informative
@@ -773,11 +774,11 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 
 /*
- * Move preemptable RCU's callbacks to ->orphan_cbs_list.
+ * Move preemptable RCU's callbacks from dying CPU to other online CPU.
  */
-static void rcu_preempt_send_cbs_to_orphanage(void)
+static void rcu_preempt_send_cbs_to_online(void)
 {
-       rcu_send_cbs_to_orphanage(&rcu_preempt_state);
+       rcu_send_cbs_to_online(&rcu_preempt_state);
 }
 
 /*
@@ -1001,7 +1002,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 /*
  * Because there is no preemptable RCU, there are no callbacks to move.
  */
-static void rcu_preempt_send_cbs_to_orphanage(void)
+static void rcu_preempt_send_cbs_to_online(void)
 {
 }
 
@@ -1014,6 +1015,132 @@ static void __init __rcu_init_preempt(void)
 
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
+#ifndef CONFIG_SMP
+
+void synchronize_sched_expedited(void)
+{
+       cond_resched();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+#else /* #ifndef CONFIG_SMP */
+
+static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
+static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
+
+static int synchronize_sched_expedited_cpu_stop(void *data)
+{
+       /*
+        * There must be a full memory barrier on each affected CPU
+        * between the time that try_stop_cpus() is called and the
+        * time that it returns.
+        *
+        * In the current initial implementation of cpu_stop, the
+        * above condition is already met when the control reaches
+        * this point and the following smp_mb() is not strictly
+        * necessary.  Do smp_mb() anyway for documentation and
+        * robustness against future implementation changes.
+        */
+       smp_mb(); /* See above comment block. */
+       return 0;
+}
+
+/*
+ * Wait for an rcu-sched grace period to elapse, but use "big hammer"
+ * approach to force grace period to end quickly.  This consumes
+ * significant time on all CPUs, and is thus not recommended for
+ * any sort of common-case code.
+ *
+ * Note that it is illegal to call this function while holding any
+ * lock that is acquired by a CPU-hotplug notifier.  Failing to
+ * observe this restriction will result in deadlock.
+ *
+ * This implementation can be thought of as an application of ticket
+ * locking to RCU, with sync_sched_expedited_started and
+ * sync_sched_expedited_done taking on the roles of the halves
+ * of the ticket-lock word.  Each task atomically increments
+ * sync_sched_expedited_started upon entry, snapshotting the old value,
+ * then attempts to stop all the CPUs.  If this succeeds, then each
+ * CPU will have executed a context switch, resulting in an RCU-sched
+ * grace period.  We are then done, so we use atomic_cmpxchg() to
+ * update sync_sched_expedited_done to match our snapshot -- but
+ * only if someone else has not already advanced past our snapshot.
+ *
+ * On the other hand, if try_stop_cpus() fails, we check the value
+ * of sync_sched_expedited_done.  If it has advanced past our
+ * initial snapshot, then someone else must have forced a grace period
+ * some time after we took our snapshot.  In this case, our work is
+ * done for us, and we can simply return.  Otherwise, we try again,
+ * but keep our initial snapshot for purposes of checking for someone
+ * doing our work for us.
+ *
+ * If we fail too many times in a row, we fall back to synchronize_sched().
+ */
+void synchronize_sched_expedited(void)
+{
+       int firstsnap, s, snap, trycount = 0;
+
+       /* Note that atomic_inc_return() implies full memory barrier. */
+       firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
+       get_online_cpus();
+
+       /*
+        * Each pass through the following loop attempts to force a
+        * context switch on each CPU.
+        */
+       while (try_stop_cpus(cpu_online_mask,
+                            synchronize_sched_expedited_cpu_stop,
+                            NULL) == -EAGAIN) {
+               put_online_cpus();
+
+               /* No joy, try again later.  Or just synchronize_sched(). */
+               if (trycount++ < 10)
+                       udelay(trycount * num_online_cpus());
+               else {
+                       synchronize_sched();
+                       return;
+               }
+
+               /* Check to see if someone else did our work for us. */
+               s = atomic_read(&sync_sched_expedited_done);
+               if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
+                       smp_mb(); /* ensure test happens before caller kfree */
+                       return;
+               }
+
+               /*
+                * Refetching sync_sched_expedited_started allows later
+                * callers to piggyback on our grace period.  We subtract
+                * 1 to get the same token that the last incrementer got.
+                * We retry after they started, so our grace period works
+                * for them, and they started after our first try, so their
+                * grace period works for us.
+                */
+               get_online_cpus();
+               snap = atomic_read(&sync_sched_expedited_started) - 1;
+               smp_mb(); /* ensure read is before try_stop_cpus(). */
+       }
+
+       /*
+        * Everyone up to our most recent fetch is covered by our grace
+        * period.  Update the counter, but only if our work is still
+        * relevant -- which it won't be if someone who started later
+        * than we did beat us to the punch.
+        */
+       do {
+               s = atomic_read(&sync_sched_expedited_done);
+               if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
+                       smp_mb(); /* ensure test happens before caller kfree */
+                       break;
+               }
+       } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
+
+       put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+#endif /* #else #ifndef CONFIG_SMP */
+
 #if !defined(CONFIG_RCU_FAST_NO_HZ)
 
 /*
index d15430b9d122f4d619e76fb6b5069aa1f494a575..c8e97853b970f71ad662732ef46da011cf46ac1d 100644 (file)
@@ -166,13 +166,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 
        gpnum = rsp->gpnum;
        seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x "
-                     "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n",
+                     "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
                   rsp->completed, gpnum, rsp->signaled,
                   (long)(rsp->jiffies_force_qs - jiffies),
                   (int)(jiffies & 0xffff),
                   rsp->n_force_qs, rsp->n_force_qs_ngp,
                   rsp->n_force_qs - rsp->n_force_qs_ngp,
-                  rsp->n_force_qs_lh, rsp->orphan_qlen);
+                  rsp->n_force_qs_lh);
        for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
                if (rnp->level != level) {
                        seq_puts(m, "\n");
@@ -300,7 +300,7 @@ static const struct file_operations rcu_pending_fops = {
 
 static struct dentry *rcudir;
 
-static int __init rcuclassic_trace_init(void)
+static int __init rcutree_trace_init(void)
 {
        struct dentry *retval;
 
@@ -337,14 +337,14 @@ free_out:
        return 1;
 }
 
-static void __exit rcuclassic_trace_cleanup(void)
+static void __exit rcutree_trace_cleanup(void)
 {
        debugfs_remove_recursive(rcudir);
 }
 
 
-module_init(rcuclassic_trace_init);
-module_exit(rcuclassic_trace_cleanup);
+module_init(rcutree_trace_init);
+module_exit(rcutree_trace_cleanup);
 
 MODULE_AUTHOR("Paul E. McKenney");
 MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");
index 297d1a0eedb0e68d8b9327f530ba477c93b1222e..04949089e7601ccd2a9b82f0f30c5905cbc9777b 100644 (file)
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
+#include <asm/mutex.h>
 
 #include "sched_cpupri.h"
 #include "workqueue_sched.h"
+#include "sched_autogroup.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
@@ -253,6 +255,8 @@ struct task_group {
        /* runqueue "owned" by this group on each cpu */
        struct cfs_rq **cfs_rq;
        unsigned long shares;
+
+       atomic_t load_weight;
 #endif
 
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -268,24 +272,19 @@ struct task_group {
        struct task_group *parent;
        struct list_head siblings;
        struct list_head children;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+       struct autogroup *autogroup;
+#endif
 };
 
 #define root_task_group init_task_group
 
-/* task_group_lock serializes add/remove of task groups and also changes to
- * a task group's cpu shares.
- */
+/* task_group_lock serializes the addition/removal of task groups */
 static DEFINE_SPINLOCK(task_group_lock);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-#ifdef CONFIG_SMP
-static int root_task_group_empty(void)
-{
-       return list_empty(&root_task_group.children);
-}
-#endif
-
 # define INIT_TASK_GROUP_LOAD  NICE_0_LOAD
 
 /*
@@ -342,6 +341,7 @@ struct cfs_rq {
         * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
         * list is used during load balance.
         */
+       int on_list;
        struct list_head leaf_cfs_rq_list;
        struct task_group *tg;  /* group that "owns" this runqueue */
 
@@ -360,14 +360,17 @@ struct cfs_rq {
        unsigned long h_load;
 
        /*
-        * this cpu's part of tg->shares
+        * Maintaining per-cpu shares distribution for group scheduling
+        *
+        * load_stamp is the last time we updated the load average
+        * load_last is the last time we updated the load average and saw load
+        * load_unacc_exec_time is currently unaccounted execution time
         */
-       unsigned long shares;
+       u64 load_avg;
+       u64 load_period;
+       u64 load_stamp, load_last, load_unacc_exec_time;
 
-       /*
-        * load.weight at the time we set shares
-        */
-       unsigned long rq_weight;
+       unsigned long load_contribution;
 #endif
 #endif
 };
@@ -605,11 +608,14 @@ static inline int cpu_of(struct rq *rq)
  */
 static inline struct task_group *task_group(struct task_struct *p)
 {
+       struct task_group *tg;
        struct cgroup_subsys_state *css;
 
        css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
                        lockdep_is_held(&task_rq(p)->lock));
-       return container_of(css, struct task_group, css);
+       tg = container_of(css, struct task_group, css);
+
+       return autogroup_task_group(p, tg);
 }
 
 /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
@@ -792,20 +798,6 @@ late_initcall(sched_init_debug);
  */
 const_debug unsigned int sysctl_sched_nr_migrate = 32;
 
-/*
- * ratelimit for updating the group shares.
- * default: 0.25ms
- */
-unsigned int sysctl_sched_shares_ratelimit = 250000;
-unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
-
-/*
- * Inject some fuzzyness into changing the per-cpu group shares
- * this avoids remote rq-locks at the expense of fairness.
- * default: 4
- */
-unsigned int sysctl_sched_shares_thresh = 4;
-
 /*
  * period over which we average the RT time consumption, measured
  * in ms.
@@ -1355,6 +1347,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
        lw->inv_weight = 0;
 }
 
+static inline void update_load_set(struct load_weight *lw, unsigned long w)
+{
+       lw->weight = w;
+       lw->inv_weight = 0;
+}
+
 /*
  * To aid in avoiding the subversion of "niceness" due to uneven distribution
  * of tasks with abnormal "nice" values across CPUs the contribution that
@@ -1543,101 +1541,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static __read_mostly unsigned long __percpu *update_shares_data;
-
-static void __set_se_shares(struct sched_entity *se, unsigned long shares);
-
-/*
- * Calculate and set the cpu's group shares.
- */
-static void update_group_shares_cpu(struct task_group *tg, int cpu,
-                                   unsigned long sd_shares,
-                                   unsigned long sd_rq_weight,
-                                   unsigned long *usd_rq_weight)
-{
-       unsigned long shares, rq_weight;
-       int boost = 0;
-
-       rq_weight = usd_rq_weight[cpu];
-       if (!rq_weight) {
-               boost = 1;
-               rq_weight = NICE_0_LOAD;
-       }
-
-       /*
-        *             \Sum_j shares_j * rq_weight_i
-        * shares_i =  -----------------------------
-        *                  \Sum_j rq_weight_j
-        */
-       shares = (sd_shares * rq_weight) / sd_rq_weight;
-       shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
-
-       if (abs(shares - tg->se[cpu]->load.weight) >
-                       sysctl_sched_shares_thresh) {
-               struct rq *rq = cpu_rq(cpu);
-               unsigned long flags;
-
-               raw_spin_lock_irqsave(&rq->lock, flags);
-               tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
-               tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
-               __set_se_shares(tg->se[cpu], shares);
-               raw_spin_unlock_irqrestore(&rq->lock, flags);
-       }
-}
-
-/*
- * Re-compute the task group their per cpu shares over the given domain.
- * This needs to be done in a bottom-up fashion because the rq weight of a
- * parent group depends on the shares of its child groups.
- */
-static int tg_shares_up(struct task_group *tg, void *data)
-{
-       unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
-       unsigned long *usd_rq_weight;
-       struct sched_domain *sd = data;
-       unsigned long flags;
-       int i;
-
-       if (!tg->se[0])
-               return 0;
-
-       local_irq_save(flags);
-       usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id());
-
-       for_each_cpu(i, sched_domain_span(sd)) {
-               weight = tg->cfs_rq[i]->load.weight;
-               usd_rq_weight[i] = weight;
-
-               rq_weight += weight;
-               /*
-                * If there are currently no tasks on the cpu pretend there
-                * is one of average load so that when a new task gets to
-                * run here it will not get delayed by group starvation.
-                */
-               if (!weight)
-                       weight = NICE_0_LOAD;
-
-               sum_weight += weight;
-               shares += tg->cfs_rq[i]->shares;
-       }
-
-       if (!rq_weight)
-               rq_weight = sum_weight;
-
-       if ((!shares && rq_weight) || shares > tg->shares)
-               shares = tg->shares;
-
-       if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
-               shares = tg->shares;
-
-       for_each_cpu(i, sched_domain_span(sd))
-               update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight);
-
-       local_irq_restore(flags);
-
-       return 0;
-}
-
 /*
  * Compute the cpu's hierarchical load factor for each task group.
  * This needs to be done in a top-down fashion because the load of a child
@@ -1652,7 +1555,7 @@ static int tg_load_down(struct task_group *tg, void *data)
                load = cpu_rq(cpu)->load.weight;
        } else {
                load = tg->parent->cfs_rq[cpu]->h_load;
-               load *= tg->cfs_rq[cpu]->shares;
+               load *= tg->se[cpu]->load.weight;
                load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
        }
 
@@ -1661,34 +1564,11 @@ static int tg_load_down(struct task_group *tg, void *data)
        return 0;
 }
 
-static void update_shares(struct sched_domain *sd)
-{
-       s64 elapsed;
-       u64 now;
-
-       if (root_task_group_empty())
-               return;
-
-       now = local_clock();
-       elapsed = now - sd->last_update;
-
-       if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
-               sd->last_update = now;
-               walk_tg_tree(tg_nop, tg_shares_up, sd);
-       }
-}
-
 static void update_h_load(long cpu)
 {
        walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
 }
 
-#else
-
-static inline void update_shares(struct sched_domain *sd)
-{
-}
-
 #endif
 
 #ifdef CONFIG_PREEMPT
@@ -1810,15 +1690,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 
 #endif
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
-{
-#ifdef CONFIG_SMP
-       cfs_rq->shares = shares;
-#endif
-}
-#endif
-
 static void calc_load_account_idle(struct rq *this_rq);
 static void update_sysctl(void);
 static int get_update_sysctl_factor(void);
@@ -2063,6 +1934,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
 #include "sched_idletask.c"
 #include "sched_fair.c"
 #include "sched_rt.c"
+#include "sched_autogroup.c"
 #include "sched_stoptask.c"
 #ifdef CONFIG_SCHED_DEBUG
 # include "sched_debug.c"
@@ -2255,10 +2127,8 @@ static int migration_cpu_stop(void *data);
  * The task's runqueue lock must be held.
  * Returns true if you have to wait for migration thread.
  */
-static bool migrate_task(struct task_struct *p, int dest_cpu)
+static bool migrate_task(struct task_struct *p, struct rq *rq)
 {
-       struct rq *rq = task_rq(p);
-
        /*
         * If the task is not on a runqueue (and not running), then
         * the next wake-up will properly place the task.
@@ -2438,18 +2308,15 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
                return dest_cpu;
 
        /* No more Mr. Nice Guy. */
-       if (unlikely(dest_cpu >= nr_cpu_ids)) {
-               dest_cpu = cpuset_cpus_allowed_fallback(p);
-               /*
-                * Don't tell them about moving exiting tasks or
-                * kernel threads (both mm NULL), since they never
-                * leave kernel.
-                */
-               if (p->mm && printk_ratelimit()) {
-                       printk(KERN_INFO "process %d (%s) no "
-                              "longer affine to cpu%d\n",
-                              task_pid_nr(p), p->comm, cpu);
-               }
+       dest_cpu = cpuset_cpus_allowed_fallback(p);
+       /*
+        * Don't tell them about moving exiting tasks or
+        * kernel threads (both mm NULL), since they never
+        * leave kernel.
+        */
+       if (p->mm && printk_ratelimit()) {
+               printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n",
+                               task_pid_nr(p), p->comm, cpu);
        }
 
        return dest_cpu;
@@ -2785,7 +2652,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
        /* Want to start with kernel preemption disabled. */
        task_thread_info(p)->preempt_count = 1;
 #endif
+#ifdef CONFIG_SMP
        plist_node_init(&p->pushable_tasks, MAX_PRIO);
+#endif
 
        put_cpu();
 }
@@ -3549,7 +3418,7 @@ void sched_exec(void)
         * select_task_rq() can race against ->cpus_allowed
         */
        if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
-           likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) {
+           likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
                struct migration_arg arg = { p, dest_cpu };
 
                task_rq_unlock(rq, &flags);
@@ -4214,7 +4083,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
                if (task_thread_info(rq->curr) != owner || need_resched())
                        return 0;
 
-               cpu_relax();
+               arch_mutex_cpu_relax();
        }
 
        return 1;
@@ -4526,7 +4395,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
  * This waits for either a completion of a specific task to be signaled or for a
  * specified timeout to expire. It is interruptible. The timeout is in jiffies.
  */
-unsigned long __sched
+long __sched
 wait_for_completion_interruptible_timeout(struct completion *x,
                                          unsigned long timeout)
 {
@@ -4559,7 +4428,7 @@ EXPORT_SYMBOL(wait_for_completion_killable);
  * signaled or for a specified timeout to expire. It can be
  * interrupted by a kill signal. The timeout is in jiffies.
  */
-unsigned long __sched
+long __sched
 wait_for_completion_killable_timeout(struct completion *x,
                                     unsigned long timeout)
 {
@@ -4901,7 +4770,7 @@ static bool check_same_owner(struct task_struct *p)
 }
 
 static int __sched_setscheduler(struct task_struct *p, int policy,
-                               struct sched_param *param, bool user)
+                               const struct sched_param *param, bool user)
 {
        int retval, oldprio, oldpolicy = -1, on_rq, running;
        unsigned long flags;
@@ -5056,7 +4925,7 @@ recheck:
  * NOTE that the task may be already dead.
  */
 int sched_setscheduler(struct task_struct *p, int policy,
-                      struct sched_param *param)
+                      const struct sched_param *param)
 {
        return __sched_setscheduler(p, policy, param, true);
 }
@@ -5074,7 +4943,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
  * but our caller might not have that capability.
  */
 int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-                              struct sched_param *param)
+                              const struct sched_param *param)
 {
        return __sched_setscheduler(p, policy, param, false);
 }
@@ -5590,7 +5459,7 @@ void sched_show_task(struct task_struct *p)
        unsigned state;
 
        state = p->state ? __ffs(p->state) + 1 : 0;
-       printk(KERN_INFO "%-13.13s %c", p->comm,
+       printk(KERN_INFO "%-15.15s %c", p->comm,
                state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
 #if BITS_PER_LONG == 32
        if (state == TASK_RUNNING)
@@ -5754,7 +5623,6 @@ static void update_sysctl(void)
        SET_SYSCTL(sched_min_granularity);
        SET_SYSCTL(sched_latency);
        SET_SYSCTL(sched_wakeup_granularity);
-       SET_SYSCTL(sched_shares_ratelimit);
 #undef SET_SYSCTL
 }
 
@@ -5830,7 +5698,7 @@ again:
                goto out;
 
        dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
-       if (migrate_task(p, dest_cpu)) {
+       if (migrate_task(p, rq)) {
                struct migration_arg arg = { p, dest_cpu };
                /* Need help from migration thread: drop lock and wait. */
                task_rq_unlock(rq, &flags);
@@ -5912,29 +5780,20 @@ static int migration_cpu_stop(void *data)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+
 /*
- * Figure out where task on dead CPU should go, use force if necessary.
+ * Ensures that the idle task is using init_mm right before its cpu goes
+ * offline.
  */
-void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void idle_task_exit(void)
 {
-       struct rq *rq = cpu_rq(dead_cpu);
-       int needs_cpu, uninitialized_var(dest_cpu);
-       unsigned long flags;
+       struct mm_struct *mm = current->active_mm;
 
-       local_irq_save(flags);
+       BUG_ON(cpu_online(smp_processor_id()));
 
-       raw_spin_lock(&rq->lock);
-       needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING);
-       if (needs_cpu)
-               dest_cpu = select_fallback_rq(dead_cpu, p);
-       raw_spin_unlock(&rq->lock);
-       /*
-        * It can only fail if we race with set_cpus_allowed(),
-        * in the racer should migrate the task anyway.
-        */
-       if (needs_cpu)
-               __migrate_task(p, dead_cpu, dest_cpu);
-       local_irq_restore(flags);
+       if (mm != &init_mm)
+               switch_mm(mm, &init_mm, current);
+       mmdrop(mm);
 }
 
 /*
@@ -5947,128 +5806,69 @@ void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 static void migrate_nr_uninterruptible(struct rq *rq_src)
 {
        struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
-       unsigned long flags;
 
-       local_irq_save(flags);
-       double_rq_lock(rq_src, rq_dest);
        rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
        rq_src->nr_uninterruptible = 0;
-       double_rq_unlock(rq_src, rq_dest);
-       local_irq_restore(flags);
-}
-
-/* Run through task list and migrate tasks from the dead cpu. */
-static void migrate_live_tasks(int src_cpu)
-{
-       struct task_struct *p, *t;
-
-       read_lock(&tasklist_lock);
-
-       do_each_thread(t, p) {
-               if (p == current)
-                       continue;
-
-               if (task_cpu(p) == src_cpu)
-                       move_task_off_dead_cpu(src_cpu, p);
-       } while_each_thread(t, p);
-
-       read_unlock(&tasklist_lock);
 }
 
 /*
- * Schedules idle task to be the next runnable task on current CPU.
- * It does so by boosting its priority to highest possible.
- * Used by CPU offline code.
+ * remove the tasks which were accounted by rq from calc_load_tasks.
  */
-void sched_idle_next(void)
+static void calc_global_load_remove(struct rq *rq)
 {
-       int this_cpu = smp_processor_id();
-       struct rq *rq = cpu_rq(this_cpu);
-       struct task_struct *p = rq->idle;
-       unsigned long flags;
-
-       /* cpu has to be offline */
-       BUG_ON(cpu_online(this_cpu));
-
-       /*
-        * Strictly not necessary since rest of the CPUs are stopped by now
-        * and interrupts disabled on the current cpu.
-        */
-       raw_spin_lock_irqsave(&rq->lock, flags);
-
-       __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
-
-       activate_task(rq, p, 0);
-
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
+       atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+       rq->calc_load_active = 0;
 }
 
 /*
- * Ensures that the idle task is using init_mm right before its cpu goes
- * offline.
+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
+ * try_to_wake_up()->select_task_rq().
+ *
+ * Called with rq->lock held even though we'er in stop_machine() and
+ * there's no concurrency possible, we hold the required locks anyway
+ * because of lock validation efforts.
  */
-void idle_task_exit(void)
-{
-       struct mm_struct *mm = current->active_mm;
-
-       BUG_ON(cpu_online(smp_processor_id()));
-
-       if (mm != &init_mm)
-               switch_mm(mm, &init_mm, current);
-       mmdrop(mm);
-}
-
-/* called under rq->lock with disabled interrupts */
-static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
+static void migrate_tasks(unsigned int dead_cpu)
 {
        struct rq *rq = cpu_rq(dead_cpu);
-
-       /* Must be exiting, otherwise would be on tasklist. */
-       BUG_ON(!p->exit_state);
-
-       /* Cannot have done final schedule yet: would have vanished. */
-       BUG_ON(p->state == TASK_DEAD);
-
-       get_task_struct(p);
+       struct task_struct *next, *stop = rq->stop;
+       int dest_cpu;
 
        /*
-        * Drop lock around migration; if someone else moves it,
-        * that's OK. No task can be added to this CPU, so iteration is
-        * fine.
+        * Fudge the rq selection such that the below task selection loop
+        * doesn't get stuck on the currently eligible stop task.
+        *
+        * We're currently inside stop_machine() and the rq is either stuck
+        * in the stop_machine_cpu_stop() loop, or we're executing this code,
+        * either way we should never end up calling schedule() until we're
+        * done here.
         */
-       raw_spin_unlock_irq(&rq->lock);
-       move_task_off_dead_cpu(dead_cpu, p);
-       raw_spin_lock_irq(&rq->lock);
-
-       put_task_struct(p);
-}
-
-/* release_task() removes task from tasklist, so we won't find dead tasks. */
-static void migrate_dead_tasks(unsigned int dead_cpu)
-{
-       struct rq *rq = cpu_rq(dead_cpu);
-       struct task_struct *next;
+       rq->stop = NULL;
 
        for ( ; ; ) {
-               if (!rq->nr_running)
+               /*
+                * There's this thread running, bail when that's the only
+                * remaining thread.
+                */
+               if (rq->nr_running == 1)
                        break;
+
                next = pick_next_task(rq);
-               if (!next)
-                       break;
+               BUG_ON(!next);
                next->sched_class->put_prev_task(rq, next);
-               migrate_dead(dead_cpu, next);
 
+               /* Find suitable destination for @next, with force if needed. */
+               dest_cpu = select_fallback_rq(dead_cpu, next);
+               raw_spin_unlock(&rq->lock);
+
+               __migrate_task(next, dead_cpu, dest_cpu);
+
+               raw_spin_lock(&rq->lock);
        }
-}
 
-/*
- * remove the tasks which were accounted by rq from calc_load_tasks.
- */
-static void calc_global_load_remove(struct rq *rq)
-{
-       atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
-       rq->calc_load_active = 0;
+       rq->stop = stop;
 }
+
 #endif /* CONFIG_HOTPLUG_CPU */
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -6278,15 +6078,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
        unsigned long flags;
        struct rq *rq = cpu_rq(cpu);
 
-       switch (action) {
+       switch (action & ~CPU_TASKS_FROZEN) {
 
        case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
                rq->calc_load_update = calc_load_update;
                break;
 
        case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
                /* Update our root-domain */
                raw_spin_lock_irqsave(&rq->lock, flags);
                if (rq->rd) {
@@ -6298,30 +6096,19 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                break;
 
 #ifdef CONFIG_HOTPLUG_CPU
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               migrate_live_tasks(cpu);
-               /* Idle task back to normal (off runqueue, low prio) */
-               raw_spin_lock_irq(&rq->lock);
-               deactivate_task(rq, rq->idle, 0);
-               __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
-               rq->idle->sched_class = &idle_sched_class;
-               migrate_dead_tasks(cpu);
-               raw_spin_unlock_irq(&rq->lock);
-               migrate_nr_uninterruptible(rq);
-               BUG_ON(rq->nr_running != 0);
-               calc_global_load_remove(rq);
-               break;
-
        case CPU_DYING:
-       case CPU_DYING_FROZEN:
                /* Update our root-domain */
                raw_spin_lock_irqsave(&rq->lock, flags);
                if (rq->rd) {
                        BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
                        set_rq_offline(rq);
                }
+               migrate_tasks(cpu);
+               BUG_ON(rq->nr_running != 1); /* the migration thread */
                raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+               migrate_nr_uninterruptible(rq);
+               calc_global_load_remove(rq);
                break;
 #endif
        }
@@ -8052,15 +7839,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
-                               struct sched_entity *se, int cpu, int add,
+                               struct sched_entity *se, int cpu,
                                struct sched_entity *parent)
 {
        struct rq *rq = cpu_rq(cpu);
        tg->cfs_rq[cpu] = cfs_rq;
        init_cfs_rq(cfs_rq, rq);
        cfs_rq->tg = tg;
-       if (add)
-               list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
 
        tg->se[cpu] = se;
        /* se could be NULL for init_task_group */
@@ -8073,15 +7858,14 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
                se->cfs_rq = parent->my_q;
 
        se->my_q = cfs_rq;
-       se->load.weight = tg->shares;
-       se->load.inv_weight = 0;
+       update_load_set(&se->load, 0);
        se->parent = parent;
 }
 #endif
 
 #ifdef CONFIG_RT_GROUP_SCHED
 static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
-               struct sched_rt_entity *rt_se, int cpu, int add,
+               struct sched_rt_entity *rt_se, int cpu,
                struct sched_rt_entity *parent)
 {
        struct rq *rq = cpu_rq(cpu);
@@ -8090,8 +7874,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
        init_rt_rq(rt_rq, rq);
        rt_rq->tg = tg;
        rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
-       if (add)
-               list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
 
        tg->rt_se[cpu] = rt_se;
        if (!rt_se)
@@ -8164,13 +7946,9 @@ void __init sched_init(void)
 #ifdef CONFIG_CGROUP_SCHED
        list_add(&init_task_group.list, &task_groups);
        INIT_LIST_HEAD(&init_task_group.children);
-
+       autogroup_init(&init_task);
 #endif /* CONFIG_CGROUP_SCHED */
 
-#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
-       update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
-                                           __alignof__(unsigned long));
-#endif
        for_each_possible_cpu(i) {
                struct rq *rq;
 
@@ -8184,7 +7962,6 @@ void __init sched_init(void)
 #ifdef CONFIG_FAIR_GROUP_SCHED
                init_task_group.shares = init_task_group_load;
                INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
-#ifdef CONFIG_CGROUP_SCHED
                /*
                 * How much cpu bandwidth does init_task_group get?
                 *
@@ -8204,16 +7981,13 @@ void __init sched_init(void)
                 * We achieve this by letting init_task_group's tasks sit
                 * directly in rq->cfs (i.e init_task_group->se[] = NULL).
                 */
-               init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL);
-#endif
+               init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL);
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
                rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
 #ifdef CONFIG_RT_GROUP_SCHED
                INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
-#ifdef CONFIG_CGROUP_SCHED
-               init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL);
-#endif
+               init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL);
 #endif
 
                for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -8293,8 +8067,6 @@ void __init sched_init(void)
                zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 #endif /* SMP */
 
-       perf_event_init();
-
        scheduler_running = 1;
 }
 
@@ -8488,7 +8260,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
                if (!se)
                        goto err_free_rq;
 
-               init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
+               init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
        }
 
        return 1;
@@ -8499,15 +8271,21 @@ err:
        return 0;
 }
 
-static inline void register_fair_sched_group(struct task_group *tg, int cpu)
-{
-       list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
-                       &cpu_rq(cpu)->leaf_cfs_rq_list);
-}
-
 static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
 {
-       list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
+       struct rq *rq = cpu_rq(cpu);
+       unsigned long flags;
+
+       /*
+       * Only empty task groups can be destroyed; so we can speculatively
+       * check on_list without danger of it being re-added.
+       */
+       if (!tg->cfs_rq[cpu]->on_list)
+               return;
+
+       raw_spin_lock_irqsave(&rq->lock, flags);
+       list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
+       raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 #else /* !CONFG_FAIR_GROUP_SCHED */
 static inline void free_fair_sched_group(struct task_group *tg)
@@ -8520,10 +8298,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
        return 1;
 }
 
-static inline void register_fair_sched_group(struct task_group *tg, int cpu)
-{
-}
-
 static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
 {
 }
@@ -8578,7 +8352,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
                if (!rt_se)
                        goto err_free_rq;
 
-               init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
+               init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
        }
 
        return 1;
@@ -8588,17 +8362,6 @@ err_free_rq:
 err:
        return 0;
 }
-
-static inline void register_rt_sched_group(struct task_group *tg, int cpu)
-{
-       list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
-                       &cpu_rq(cpu)->leaf_rt_rq_list);
-}
-
-static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
-{
-       list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
-}
 #else /* !CONFIG_RT_GROUP_SCHED */
 static inline void free_rt_sched_group(struct task_group *tg)
 {
@@ -8609,14 +8372,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 {
        return 1;
 }
-
-static inline void register_rt_sched_group(struct task_group *tg, int cpu)
-{
-}
-
-static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
-{
-}
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 #ifdef CONFIG_CGROUP_SCHED
@@ -8632,7 +8387,6 @@ struct task_group *sched_create_group(struct task_group *parent)
 {
        struct task_group *tg;
        unsigned long flags;
-       int i;
 
        tg = kzalloc(sizeof(*tg), GFP_KERNEL);
        if (!tg)
@@ -8645,10 +8399,6 @@ struct task_group *sched_create_group(struct task_group *parent)
                goto err;
 
        spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i) {
-               register_fair_sched_group(tg, i);
-               register_rt_sched_group(tg, i);
-       }
        list_add_rcu(&tg->list, &task_groups);
 
        WARN_ON(!parent); /* root should already exist */
@@ -8678,11 +8428,11 @@ void sched_destroy_group(struct task_group *tg)
        unsigned long flags;
        int i;
 
-       spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i) {
+       /* end participation in shares distribution */
+       for_each_possible_cpu(i)
                unregister_fair_sched_group(tg, i);
-               unregister_rt_sched_group(tg, i);
-       }
+
+       spin_lock_irqsave(&task_group_lock, flags);
        list_del_rcu(&tg->list);
        list_del_rcu(&tg->siblings);
        spin_unlock_irqrestore(&task_group_lock, flags);
@@ -8729,33 +8479,6 @@ void sched_move_task(struct task_struct *tsk)
 #endif /* CONFIG_CGROUP_SCHED */
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void __set_se_shares(struct sched_entity *se, unsigned long shares)
-{
-       struct cfs_rq *cfs_rq = se->cfs_rq;
-       int on_rq;
-
-       on_rq = se->on_rq;
-       if (on_rq)
-               dequeue_entity(cfs_rq, se, 0);
-
-       se->load.weight = shares;
-       se->load.inv_weight = 0;
-
-       if (on_rq)
-               enqueue_entity(cfs_rq, se, 0);
-}
-
-static void set_se_shares(struct sched_entity *se, unsigned long shares)
-{
-       struct cfs_rq *cfs_rq = se->cfs_rq;
-       struct rq *rq = cfs_rq->rq;
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&rq->lock, flags);
-       __set_se_shares(se, shares);
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
-}
-
 static DEFINE_MUTEX(shares_mutex);
 
 int sched_group_set_shares(struct task_group *tg, unsigned long shares)
@@ -8778,37 +8501,19 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
        if (tg->shares == shares)
                goto done;
 
-       spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i)
-               unregister_fair_sched_group(tg, i);
-       list_del_rcu(&tg->siblings);
-       spin_unlock_irqrestore(&task_group_lock, flags);
-
-       /* wait for any ongoing reference to this group to finish */
-       synchronize_sched();
-
-       /*
-        * Now we are free to modify the group's share on each cpu
-        * w/o tripping rebalance_share or load_balance_fair.
-        */
        tg->shares = shares;
        for_each_possible_cpu(i) {
-               /*
-                * force a rebalance
-                */
-               cfs_rq_set_shares(tg->cfs_rq[i], 0);
-               set_se_shares(tg->se[i], shares);
+               struct rq *rq = cpu_rq(i);
+               struct sched_entity *se;
+
+               se = tg->se[i];
+               /* Propagate contribution to hierarchy */
+               raw_spin_lock_irqsave(&rq->lock, flags);
+               for_each_sched_entity(se)
+                       update_cfs_shares(group_cfs_rq(se), 0);
+               raw_spin_unlock_irqrestore(&rq->lock, flags);
        }
 
-       /*
-        * Enable load balance activity on this group, by inserting it back on
-        * each cpu's rq->leaf_cfs_rq_list.
-        */
-       spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i)
-               register_fair_sched_group(tg, i);
-       list_add_rcu(&tg->siblings, &tg->parent->children);
-       spin_unlock_irqrestore(&task_group_lock, flags);
 done:
        mutex_unlock(&shares_mutex);
        return 0;
@@ -9534,72 +9239,3 @@ struct cgroup_subsys cpuacct_subsys = {
 };
 #endif /* CONFIG_CGROUP_CPUACCT */
 
-#ifndef CONFIG_SMP
-
-void synchronize_sched_expedited(void)
-{
-       barrier();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
-#else /* #ifndef CONFIG_SMP */
-
-static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
-
-static int synchronize_sched_expedited_cpu_stop(void *data)
-{
-       /*
-        * There must be a full memory barrier on each affected CPU
-        * between the time that try_stop_cpus() is called and the
-        * time that it returns.
-        *
-        * In the current initial implementation of cpu_stop, the
-        * above condition is already met when the control reaches
-        * this point and the following smp_mb() is not strictly
-        * necessary.  Do smp_mb() anyway for documentation and
-        * robustness against future implementation changes.
-        */
-       smp_mb(); /* See above comment block. */
-       return 0;
-}
-
-/*
- * Wait for an rcu-sched grace period to elapse, but use "big hammer"
- * approach to force grace period to end quickly.  This consumes
- * significant time on all CPUs, and is thus not recommended for
- * any sort of common-case code.
- *
- * Note that it is illegal to call this function while holding any
- * lock that is acquired by a CPU-hotplug notifier.  Failing to
- * observe this restriction will result in deadlock.
- */
-void synchronize_sched_expedited(void)
-{
-       int snap, trycount = 0;
-
-       smp_mb();  /* ensure prior mod happens before capturing snap. */
-       snap = atomic_read(&synchronize_sched_expedited_count) + 1;
-       get_online_cpus();
-       while (try_stop_cpus(cpu_online_mask,
-                            synchronize_sched_expedited_cpu_stop,
-                            NULL) == -EAGAIN) {
-               put_online_cpus();
-               if (trycount++ < 10)
-                       udelay(trycount * num_online_cpus());
-               else {
-                       synchronize_sched();
-                       return;
-               }
-               if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
-                       smp_mb(); /* ensure test happens before caller kfree */
-                       return;
-               }
-               get_online_cpus();
-       }
-       atomic_inc(&synchronize_sched_expedited_count);
-       smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
-       put_online_cpus();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
-#endif /* #else #ifndef CONFIG_SMP */
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
new file mode 100644 (file)
index 0000000..c80fedc
--- /dev/null
@@ -0,0 +1,238 @@
+#ifdef CONFIG_SCHED_AUTOGROUP
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+#include <linux/utsname.h>
+
+unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
+static struct autogroup autogroup_default;
+static atomic_t autogroup_seq_nr;
+
+static void autogroup_init(struct task_struct *init_task)
+{
+       autogroup_default.tg = &init_task_group;
+       init_task_group.autogroup = &autogroup_default;
+       kref_init(&autogroup_default.kref);
+       init_rwsem(&autogroup_default.lock);
+       init_task->signal->autogroup = &autogroup_default;
+}
+
+static inline void autogroup_free(struct task_group *tg)
+{
+       kfree(tg->autogroup);
+}
+
+static inline void autogroup_destroy(struct kref *kref)
+{
+       struct autogroup *ag = container_of(kref, struct autogroup, kref);
+
+       sched_destroy_group(ag->tg);
+}
+
+static inline void autogroup_kref_put(struct autogroup *ag)
+{
+       kref_put(&ag->kref, autogroup_destroy);
+}
+
+static inline struct autogroup *autogroup_kref_get(struct autogroup *ag)
+{
+       kref_get(&ag->kref);
+       return ag;
+}
+
+static inline struct autogroup *autogroup_task_get(struct task_struct *p)
+{
+       struct autogroup *ag;
+       unsigned long flags;
+
+       if (!lock_task_sighand(p, &flags))
+               return autogroup_kref_get(&autogroup_default);
+
+       ag = autogroup_kref_get(p->signal->autogroup);
+       unlock_task_sighand(p, &flags);
+
+       return ag;
+}
+
+static inline struct autogroup *autogroup_create(void)
+{
+       struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
+       struct task_group *tg;
+
+       if (!ag)
+               goto out_fail;
+
+       tg = sched_create_group(&init_task_group);
+
+       if (IS_ERR(tg))
+               goto out_free;
+
+       kref_init(&ag->kref);
+       init_rwsem(&ag->lock);
+       ag->id = atomic_inc_return(&autogroup_seq_nr);
+       ag->tg = tg;
+       tg->autogroup = ag;
+
+       return ag;
+
+out_free:
+       kfree(ag);
+out_fail:
+       if (printk_ratelimit()) {
+               printk(KERN_WARNING "autogroup_create: %s failure.\n",
+                       ag ? "sched_create_group()" : "kmalloc()");
+       }
+
+       return autogroup_kref_get(&autogroup_default);
+}
+
+static inline bool
+task_wants_autogroup(struct task_struct *p, struct task_group *tg)
+{
+       if (tg != &root_task_group)
+               return false;
+
+       if (p->sched_class != &fair_sched_class)
+               return false;
+
+       /*
+        * We can only assume the task group can't go away on us if
+        * autogroup_move_group() can see us on ->thread_group list.
+        */
+       if (p->flags & PF_EXITING)
+               return false;
+
+       return true;
+}
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
+{
+       int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
+
+       if (enabled && task_wants_autogroup(p, tg))
+               return p->signal->autogroup->tg;
+
+       return tg;
+}
+
+static void
+autogroup_move_group(struct task_struct *p, struct autogroup *ag)
+{
+       struct autogroup *prev;
+       struct task_struct *t;
+       unsigned long flags;
+
+       BUG_ON(!lock_task_sighand(p, &flags));
+
+       prev = p->signal->autogroup;
+       if (prev == ag) {
+               unlock_task_sighand(p, &flags);
+               return;
+       }
+
+       p->signal->autogroup = autogroup_kref_get(ag);
+
+       t = p;
+       do {
+               sched_move_task(t);
+       } while_each_thread(p, t);
+
+       unlock_task_sighand(p, &flags);
+       autogroup_kref_put(prev);
+}
+
+/* Allocates GFP_KERNEL, cannot be called under any spinlock */
+void sched_autogroup_create_attach(struct task_struct *p)
+{
+       struct autogroup *ag = autogroup_create();
+
+       autogroup_move_group(p, ag);
+       /* drop extra refrence added by autogroup_create() */
+       autogroup_kref_put(ag);
+}
+EXPORT_SYMBOL(sched_autogroup_create_attach);
+
+/* Cannot be called under siglock.  Currently has no users */
+void sched_autogroup_detach(struct task_struct *p)
+{
+       autogroup_move_group(p, &autogroup_default);
+}
+EXPORT_SYMBOL(sched_autogroup_detach);
+
+void sched_autogroup_fork(struct signal_struct *sig)
+{
+       sig->autogroup = autogroup_task_get(current);
+}
+
+void sched_autogroup_exit(struct signal_struct *sig)
+{
+       autogroup_kref_put(sig->autogroup);
+}
+
+static int __init setup_autogroup(char *str)
+{
+       sysctl_sched_autogroup_enabled = 0;
+
+       return 1;
+}
+
+__setup("noautogroup", setup_autogroup);
+
+#ifdef CONFIG_PROC_FS
+
+int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice)
+{
+       static unsigned long next = INITIAL_JIFFIES;
+       struct autogroup *ag;
+       int err;
+
+       if (*nice < -20 || *nice > 19)
+               return -EINVAL;
+
+       err = security_task_setnice(current, *nice);
+       if (err)
+               return err;
+
+       if (*nice < 0 && !can_nice(current, *nice))
+               return -EPERM;
+
+       /* this is a heavy operation taking global locks.. */
+       if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
+               return -EAGAIN;
+
+       next = HZ / 10 + jiffies;
+       ag = autogroup_task_get(p);
+
+       down_write(&ag->lock);
+       err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]);
+       if (!err)
+               ag->nice = *nice;
+       up_write(&ag->lock);
+
+       autogroup_kref_put(ag);
+
+       return err;
+}
+
+void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
+{
+       struct autogroup *ag = autogroup_task_get(p);
+
+       down_read(&ag->lock);
+       seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
+       up_read(&ag->lock);
+
+       autogroup_kref_put(ag);
+}
+#endif /* CONFIG_PROC_FS */
+
+#ifdef CONFIG_SCHED_DEBUG
+static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
+{
+       return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
+}
+#endif /* CONFIG_SCHED_DEBUG */
+
+#endif /* CONFIG_SCHED_AUTOGROUP */
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
new file mode 100644 (file)
index 0000000..5358e24
--- /dev/null
@@ -0,0 +1,32 @@
+#ifdef CONFIG_SCHED_AUTOGROUP
+
+struct autogroup {
+       struct kref             kref;
+       struct task_group       *tg;
+       struct rw_semaphore     lock;
+       unsigned long           id;
+       int                     nice;
+};
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg);
+
+#else /* !CONFIG_SCHED_AUTOGROUP */
+
+static inline void autogroup_init(struct task_struct *init_task) {  }
+static inline void autogroup_free(struct task_group *tg) { }
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
+{
+       return tg;
+}
+
+#ifdef CONFIG_SCHED_DEBUG
+static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
+{
+       return 0;
+}
+#endif
+
+#endif /* CONFIG_SCHED_AUTOGROUP */
index 52f1a149bfb15a871a362255498fadf90e357c57..9d8af0b3fb64544d9ca7076f3478d2239b46540e 100644 (file)
@@ -79,7 +79,7 @@ unsigned long long __attribute__((weak)) sched_clock(void)
 }
 EXPORT_SYMBOL_GPL(sched_clock);
 
-static __read_mostly int sched_clock_running;
+__read_mostly int sched_clock_running;
 
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 __read_mostly int sched_clock_stable;
index 2e1b0d17dd9b6a8b4ac48891a988c025a8b07ed5..1dfae3d014b5934eba4b3be25bfbad746196b4b7 100644 (file)
@@ -54,8 +54,7 @@ static unsigned long nsec_low(unsigned long long nsec)
 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void print_cfs_group_stats(struct seq_file *m, int cpu,
-               struct task_group *tg)
+static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
 {
        struct sched_entity *se = tg->se[cpu];
        if (!se)
@@ -110,16 +109,6 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
                0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
 #endif
 
-#ifdef CONFIG_CGROUP_SCHED
-       {
-               char path[64];
-
-               rcu_read_lock();
-               cgroup_path(task_group(p)->css.cgroup, path, sizeof(path));
-               rcu_read_unlock();
-               SEQ_printf(m, " %s", path);
-       }
-#endif
        SEQ_printf(m, "\n");
 }
 
@@ -147,19 +136,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
        read_unlock_irqrestore(&tasklist_lock, flags);
 }
 
-#if defined(CONFIG_CGROUP_SCHED) && \
-       (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED))
-static void task_group_path(struct task_group *tg, char *buf, int buflen)
-{
-       /* may be NULL if the underlying cgroup isn't fully-created yet */
-       if (!tg->css.cgroup) {
-               buf[0] = '\0';
-               return;
-       }
-       cgroup_path(tg->css.cgroup, buf, buflen);
-}
-#endif
-
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
        s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
@@ -168,16 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
        struct sched_entity *last;
        unsigned long flags;
 
-#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
-       char path[128];
-       struct task_group *tg = cfs_rq->tg;
-
-       task_group_path(tg, path, sizeof(path));
-
-       SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
-#else
        SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
-#endif
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
                        SPLIT_NS(cfs_rq->exec_clock));
 
@@ -202,32 +169,29 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
        spread0 = min_vruntime - rq0_min_vruntime;
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
                        SPLIT_NS(spread0));
-       SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
-       SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
-
        SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
                        cfs_rq->nr_spread_over);
+       SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
+       SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
 #ifdef CONFIG_FAIR_GROUP_SCHED
 #ifdef CONFIG_SMP
-       SEQ_printf(m, "  .%-30s: %lu\n", "shares", cfs_rq->shares);
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "load_avg",
+                       SPLIT_NS(cfs_rq->load_avg));
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "load_period",
+                       SPLIT_NS(cfs_rq->load_period));
+       SEQ_printf(m, "  .%-30s: %ld\n", "load_contrib",
+                       cfs_rq->load_contribution);
+       SEQ_printf(m, "  .%-30s: %d\n", "load_tg",
+                       atomic_read(&cfs_rq->tg->load_weight));
 #endif
+
        print_cfs_group_stats(m, cpu, cfs_rq->tg);
 #endif
 }
 
 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 {
-#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
-       char path[128];
-       struct task_group *tg = rt_rq->tg;
-
-       task_group_path(tg, path, sizeof(path));
-
-       SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
-#else
        SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
-#endif
-
 
 #define P(x) \
        SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
@@ -243,6 +207,8 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 #undef P
 }
 
+extern __read_mostly int sched_clock_running;
+
 static void print_cpu(struct seq_file *m, int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
@@ -314,21 +280,42 @@ static const char *sched_tunable_scaling_names[] = {
 
 static int sched_debug_show(struct seq_file *m, void *v)
 {
-       u64 now = ktime_to_ns(ktime_get());
+       u64 ktime, sched_clk, cpu_clk;
+       unsigned long flags;
        int cpu;
 
-       SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n",
+       local_irq_save(flags);
+       ktime = ktime_to_ns(ktime_get());
+       sched_clk = sched_clock();
+       cpu_clk = local_clock();
+       local_irq_restore(flags);
+
+       SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n",
                init_utsname()->release,
                (int)strcspn(init_utsname()->version, " "),
                init_utsname()->version);
 
-       SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now));
+#define P(x) \
+       SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
+#define PN(x) \
+       SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
+       PN(ktime);
+       PN(sched_clk);
+       PN(cpu_clk);
+       P(jiffies);
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+       P(sched_clock_stable);
+#endif
+#undef PN
+#undef P
+
+       SEQ_printf(m, "\n");
+       SEQ_printf(m, "sysctl_sched\n");
 
 #define P(x) \
        SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
 #define PN(x) \
        SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
-       P(jiffies);
        PN(sysctl_sched_latency);
        PN(sysctl_sched_min_granularity);
        PN(sysctl_sched_wakeup_granularity);
index 00ebd7686676bd87a6e5b3be513d27028e518796..c62ebae65cf0c5e5d1628b0368692a94cda37568 100644 (file)
@@ -89,6 +89,13 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
 
 const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
 
+/*
+ * The exponential sliding  window over which load is averaged for shares
+ * distribution.
+ * (default: 10msec)
+ */
+unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
+
 static const struct sched_class fair_sched_class;
 
 /**************************************************************
@@ -143,6 +150,36 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
        return cfs_rq->tg->cfs_rq[this_cpu];
 }
 
+static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+       if (!cfs_rq->on_list) {
+               /*
+                * Ensure we either appear before our parent (if already
+                * enqueued) or force our parent to appear after us when it is
+                * enqueued.  The fact that we always enqueue bottom-up
+                * reduces this to two cases.
+                */
+               if (cfs_rq->tg->parent &&
+                   cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) {
+                       list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
+                               &rq_of(cfs_rq)->leaf_cfs_rq_list);
+               } else {
+                       list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
+                               &rq_of(cfs_rq)->leaf_cfs_rq_list);
+               }
+
+               cfs_rq->on_list = 1;
+       }
+}
+
+static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+       if (cfs_rq->on_list) {
+               list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
+               cfs_rq->on_list = 0;
+       }
+}
+
 /* Iterate thr' all leaf cfs_rq's on a runqueue */
 #define for_each_leaf_cfs_rq(rq, cfs_rq) \
        list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
@@ -246,6 +283,14 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
        return &cpu_rq(this_cpu)->cfs;
 }
 
+static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+}
+
+static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+}
+
 #define for_each_leaf_cfs_rq(rq, cfs_rq) \
                for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
 
@@ -417,7 +462,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
        WRT_SYSCTL(sched_min_granularity);
        WRT_SYSCTL(sched_latency);
        WRT_SYSCTL(sched_wakeup_granularity);
-       WRT_SYSCTL(sched_shares_ratelimit);
 #undef WRT_SYSCTL
 
        return 0;
@@ -495,6 +539,9 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
        return calc_delta_fair(sched_slice(cfs_rq, se), se);
 }
 
+static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
+static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta);
+
 /*
  * Update the current task's runtime statistics. Skip current tasks that
  * are not in our scheduling class.
@@ -514,6 +561,10 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 
        curr->vruntime += delta_exec_weighted;
        update_min_vruntime(cfs_rq);
+
+#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
+       cfs_rq->load_unacc_exec_time += delta_exec;
+#endif
 }
 
 static void update_curr(struct cfs_rq *cfs_rq)
@@ -633,7 +684,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
                list_add(&se->group_node, &cfs_rq->tasks);
        }
        cfs_rq->nr_running++;
-       se->on_rq = 1;
 }
 
 static void
@@ -647,9 +697,140 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
                list_del_init(&se->group_node);
        }
        cfs_rq->nr_running--;
-       se->on_rq = 0;
 }
 
+#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
+static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
+                                           int global_update)
+{
+       struct task_group *tg = cfs_rq->tg;
+       long load_avg;
+
+       load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
+       load_avg -= cfs_rq->load_contribution;
+
+       if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) {
+               atomic_add(load_avg, &tg->load_weight);
+               cfs_rq->load_contribution += load_avg;
+       }
+}
+
+static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
+{
+       u64 period = sysctl_sched_shares_window;
+       u64 now, delta;
+       unsigned long load = cfs_rq->load.weight;
+
+       if (!cfs_rq)
+               return;
+
+       now = rq_of(cfs_rq)->clock;
+       delta = now - cfs_rq->load_stamp;
+
+       /* truncate load history at 4 idle periods */
+       if (cfs_rq->load_stamp > cfs_rq->load_last &&
+           now - cfs_rq->load_last > 4 * period) {
+               cfs_rq->load_period = 0;
+               cfs_rq->load_avg = 0;
+       }
+
+       cfs_rq->load_stamp = now;
+       cfs_rq->load_unacc_exec_time = 0;
+       cfs_rq->load_period += delta;
+       if (load) {
+               cfs_rq->load_last = now;
+               cfs_rq->load_avg += delta * load;
+       }
+
+       /* consider updating load contribution on each fold or truncate */
+       if (global_update || cfs_rq->load_period > period
+           || !cfs_rq->load_period)
+               update_cfs_rq_load_contribution(cfs_rq, global_update);
+
+       while (cfs_rq->load_period > period) {
+               /*
+                * Inline assembly required to prevent the compiler
+                * optimising this loop into a divmod call.
+                * See __iter_div_u64_rem() for another example of this.
+                */
+               asm("" : "+rm" (cfs_rq->load_period));
+               cfs_rq->load_period /= 2;
+               cfs_rq->load_avg /= 2;
+       }
+
+       if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
+               list_del_leaf_cfs_rq(cfs_rq);
+}
+
+static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
+                           unsigned long weight)
+{
+       if (se->on_rq) {
+               /* commit outstanding execution time */
+               if (cfs_rq->curr == se)
+                       update_curr(cfs_rq);
+               account_entity_dequeue(cfs_rq, se);
+       }
+
+       update_load_set(&se->load, weight);
+
+       if (se->on_rq)
+               account_entity_enqueue(cfs_rq, se);
+}
+
+static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
+{
+       struct task_group *tg;
+       struct sched_entity *se;
+       long load_weight, load, shares;
+
+       if (!cfs_rq)
+               return;
+
+       tg = cfs_rq->tg;
+       se = tg->se[cpu_of(rq_of(cfs_rq))];
+       if (!se)
+               return;
+
+       load = cfs_rq->load.weight + weight_delta;
+
+       load_weight = atomic_read(&tg->load_weight);
+       load_weight -= cfs_rq->load_contribution;
+       load_weight += load;
+
+       shares = (tg->shares * load);
+       if (load_weight)
+               shares /= load_weight;
+
+       if (shares < MIN_SHARES)
+               shares = MIN_SHARES;
+       if (shares > tg->shares)
+               shares = tg->shares;
+
+       reweight_entity(cfs_rq_of(se), se, shares);
+}
+
+static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
+{
+       if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
+               update_cfs_load(cfs_rq, 0);
+               update_cfs_shares(cfs_rq, 0);
+       }
+}
+#else /* CONFIG_FAIR_GROUP_SCHED */
+static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
+{
+}
+
+static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
+{
+}
+
+static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
+{
+}
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 #ifdef CONFIG_SCHEDSTATS
@@ -771,6 +952,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         * Update run-time statistics of the 'current'.
         */
        update_curr(cfs_rq);
+       update_cfs_load(cfs_rq, 0);
+       update_cfs_shares(cfs_rq, se->load.weight);
        account_entity_enqueue(cfs_rq, se);
 
        if (flags & ENQUEUE_WAKEUP) {
@@ -782,6 +965,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
        check_spread(cfs_rq, se);
        if (se != cfs_rq->curr)
                __enqueue_entity(cfs_rq, se);
+       se->on_rq = 1;
+
+       if (cfs_rq->nr_running == 1)
+               list_add_leaf_cfs_rq(cfs_rq);
 }
 
 static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -825,8 +1012,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
        if (se != cfs_rq->curr)
                __dequeue_entity(cfs_rq, se);
+       se->on_rq = 0;
+       update_cfs_load(cfs_rq, 0);
        account_entity_dequeue(cfs_rq, se);
        update_min_vruntime(cfs_rq);
+       update_cfs_shares(cfs_rq, 0);
 
        /*
         * Normalize the entity after updating the min_vruntime because the
@@ -955,6 +1145,11 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
         */
        update_curr(cfs_rq);
 
+       /*
+        * Update share accounting for long-running entities.
+        */
+       update_entity_shares_tick(cfs_rq);
+
 #ifdef CONFIG_SCHED_HRTICK
        /*
         * queued ticks are scheduled to match the slice, so don't bother
@@ -1055,6 +1250,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                flags = ENQUEUE_WAKEUP;
        }
 
+       for_each_sched_entity(se) {
+               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+               update_cfs_load(cfs_rq, 0);
+               update_cfs_shares(cfs_rq, 0);
+       }
+
        hrtick_update(rq);
 }
 
@@ -1071,12 +1273,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
        for_each_sched_entity(se) {
                cfs_rq = cfs_rq_of(se);
                dequeue_entity(cfs_rq, se, flags);
+
                /* Don't dequeue parent if it has other entities besides us */
                if (cfs_rq->load.weight)
                        break;
                flags |= DEQUEUE_SLEEP;
        }
 
+       for_each_sched_entity(se) {
+               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+               update_cfs_load(cfs_rq, 0);
+               update_cfs_shares(cfs_rq, 0);
+       }
+
        hrtick_update(rq);
 }
 
@@ -1143,51 +1353,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p)
  * Adding load to a group doesn't make a group heavier, but can cause movement
  * of group shares between cpus. Assuming the shares were perfectly aligned one
  * can calculate the shift in shares.
- *
- * The problem is that perfectly aligning the shares is rather expensive, hence
- * we try to avoid doing that too often - see update_shares(), which ratelimits
- * this change.
- *
- * We compensate this by not only taking the current delta into account, but
- * also considering the delta between when the shares were last adjusted and
- * now.
- *
- * We still saw a performance dip, some tracing learned us that between
- * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased
- * significantly. Therefore try to bias the error in direction of failing
- * the affine wakeup.
- *
  */
-static long effective_load(struct task_group *tg, int cpu,
-               long wl, long wg)
+static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
 {
        struct sched_entity *se = tg->se[cpu];
 
        if (!tg->parent)
                return wl;
 
-       /*
-        * By not taking the decrease of shares on the other cpu into
-        * account our error leans towards reducing the affine wakeups.
-        */
-       if (!wl && sched_feat(ASYM_EFF_LOAD))
-               return wl;
-
        for_each_sched_entity(se) {
                long S, rw, s, a, b;
-               long more_w;
-
-               /*
-                * Instead of using this increment, also add the difference
-                * between when the shares were last updated and now.
-                */
-               more_w = se->my_q->load.weight - se->my_q->rq_weight;
-               wl += more_w;
-               wg += more_w;
 
                S = se->my_q->tg->shares;
-               s = se->my_q->shares;
-               rw = se->my_q->rq_weight;
+               s = se->load.weight;
+               rw = se->my_q->load.weight;
 
                a = S*(rw + wl);
                b = S*rw + s*wg;
@@ -1508,23 +1687,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
                        sd = tmp;
        }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-       if (sched_feat(LB_SHARES_UPDATE)) {
-               /*
-                * Pick the largest domain to update shares over
-                */
-               tmp = sd;
-               if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
-                       tmp = affine_sd;
-
-               if (tmp) {
-                       raw_spin_unlock(&rq->lock);
-                       update_shares(tmp);
-                       raw_spin_lock(&rq->lock);
-               }
-       }
-#endif
-
        if (affine_sd) {
                if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
                        return select_idle_sibling(p, cpu);
@@ -1909,6 +2071,48 @@ out:
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * update tg->load_weight by folding this cpu's load_avg
+ */
+static int update_shares_cpu(struct task_group *tg, int cpu)
+{
+       struct cfs_rq *cfs_rq;
+       unsigned long flags;
+       struct rq *rq;
+
+       if (!tg->se[cpu])
+               return 0;
+
+       rq = cpu_rq(cpu);
+       cfs_rq = tg->cfs_rq[cpu];
+
+       raw_spin_lock_irqsave(&rq->lock, flags);
+
+       update_rq_clock(rq);
+       update_cfs_load(cfs_rq, 1);
+
+       /*
+        * We need to update shares after updating tg->load_weight in
+        * order to adjust the weight of groups with long running tasks.
+        */
+       update_cfs_shares(cfs_rq, 0);
+
+       raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+       return 0;
+}
+
+static void update_shares(int cpu)
+{
+       struct cfs_rq *cfs_rq;
+       struct rq *rq = cpu_rq(cpu);
+
+       rcu_read_lock();
+       for_each_leaf_cfs_rq(rq, cfs_rq)
+               update_shares_cpu(cfs_rq->tg, cpu);
+       rcu_read_unlock();
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                  unsigned long max_load_move,
@@ -1956,6 +2160,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
        return max_load_move - rem_load_move;
 }
 #else
+static inline void update_shares(int cpu)
+{
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                  unsigned long max_load_move,
@@ -3032,7 +3240,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
        schedstat_inc(sd, lb_count[idle]);
 
 redo:
-       update_shares(sd);
        group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
                                   cpus, balance);
 
@@ -3174,8 +3381,6 @@ out_one_pinned:
        else
                ld_moved = 0;
 out:
-       if (ld_moved)
-               update_shares(sd);
        return ld_moved;
 }
 
@@ -3199,6 +3404,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
         */
        raw_spin_unlock(&this_rq->lock);
 
+       update_shares(this_cpu);
        for_each_domain(this_cpu, sd) {
                unsigned long interval;
                int balance = 1;
@@ -3569,6 +3775,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
        int update_next_balance = 0;
        int need_serialize;
 
+       update_shares(cpu);
+
        for_each_domain(cpu, sd) {
                if (!(sd->flags & SD_LOAD_BALANCE))
                        continue;
index 185f920ec1a2e923b0d966f787c610ff26a7b6cb..68e69acc29b9570b10ecf8a5892ea62c48700c5c 100644 (file)
@@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0)
 SCHED_FEAT(HRTICK, 0)
 SCHED_FEAT(DOUBLE_TICK, 0)
 SCHED_FEAT(LB_BIAS, 1)
-SCHED_FEAT(LB_SHARES_UPDATE, 1)
-SCHED_FEAT(ASYM_EFF_LOAD, 1)
 
 /*
  * Spin-wait on mutex acquisition when the mutex owner is running on
index bea7d79f7e9ca958bba514cbd8eb48ceab47bab3..c914ec747ca6709e25a177eb3c4152c75cb40aee 100644 (file)
@@ -183,6 +183,17 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
        return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
 }
 
+static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+       list_add_rcu(&rt_rq->leaf_rt_rq_list,
+                       &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
+}
+
+static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+       list_del_rcu(&rt_rq->leaf_rt_rq_list);
+}
+
 #define for_each_leaf_rt_rq(rt_rq, rq) \
        list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
 
@@ -276,6 +287,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
        return ktime_to_ns(def_rt_bandwidth.rt_period);
 }
 
+static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+}
+
+static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+}
+
 #define for_each_leaf_rt_rq(rt_rq, rq) \
        for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
 
@@ -825,6 +844,9 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
        if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
                return;
 
+       if (!rt_rq->rt_nr_running)
+               list_add_leaf_rt_rq(rt_rq);
+
        if (head)
                list_add(&rt_se->run_list, queue);
        else
@@ -844,6 +866,8 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
                __clear_bit(rt_se_prio(rt_se), array->bitmap);
 
        dec_rt_tasks(rt_se, rt_rq);
+       if (!rt_rq->rt_nr_running)
+               list_del_leaf_rt_rq(rt_rq);
 }
 
 /*
index 18f4be0d5fe0bbf853935972d9b441e95bc61c5a..d4d918a91881407acd8abbde6691f77197cd013c 100644 (file)
@@ -853,7 +853,9 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
                             cpumask_any(cpu_online_mask));
        case CPU_DEAD:
        case CPU_DEAD_FROZEN: {
-               struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+               static struct sched_param param = {
+                       .sched_priority = MAX_RT_PRIO-1
+               };
 
                p = per_cpu(ksoftirqd, hotcpu);
                per_cpu(ksoftirqd, hotcpu) = NULL;
index c71e075005368eceff3aab4340f94beca4aee249..98d8c1e80edbcb106ba8e87c34777459aa4eff55 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
+#include <linux/delay.h>
 #include <linux/srcu.h>
 
 static int init_srcu_struct_fields(struct srcu_struct *sp)
@@ -203,9 +204,14 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
         * all srcu_read_lock() calls using the old counters have completed.
         * Their corresponding critical sections might well be still
         * executing, but the srcu_read_lock() primitives themselves
-        * will have finished executing.
+        * will have finished executing.  We initially give readers
+        * an arbitrarily chosen 10 microseconds to get out of their
+        * SRCU read-side critical sections, then loop waiting 1/HZ
+        * seconds per iteration.
         */
 
+       if (srcu_readers_active_idx(sp, idx))
+               udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY);
        while (srcu_readers_active_idx(sp, idx))
                schedule_timeout_interruptible(1);
 
index 7f5a0cd296a96ca44e43f0db028026094dbbb57a..2745dcdb6c6c5756a7bafd19e6497c31cc077d60 100644 (file)
@@ -1080,8 +1080,10 @@ SYSCALL_DEFINE0(setsid)
        err = session;
 out:
        write_unlock_irq(&tasklist_lock);
-       if (err > 0)
+       if (err > 0) {
                proc_sid_connector(group_leader);
+               sched_autogroup_create_attach(group_leader);
+       }
        return err;
 }
 
index 5abfa151855493735a91fd45a255a45727c8ba97..ae5cbb1e3ced15b8cc2e00b052496953709cf4c2 100644 (file)
@@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns;                       /* 0 usecs */
 static int max_wakeup_granularity_ns = NSEC_PER_SEC;   /* 1 second */
 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
-static int min_sched_shares_ratelimit = 100000; /* 100 usec */
-static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
 #endif
 
 #ifdef CONFIG_COMPACTION
@@ -304,15 +302,6 @@ static struct ctl_table kern_table[] = {
                .extra1         = &min_wakeup_granularity_ns,
                .extra2         = &max_wakeup_granularity_ns,
        },
-       {
-               .procname       = "sched_shares_ratelimit",
-               .data           = &sysctl_sched_shares_ratelimit,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = sched_proc_update_handler,
-               .extra1         = &min_sched_shares_ratelimit,
-               .extra2         = &max_sched_shares_ratelimit,
-       },
        {
                .procname       = "sched_tunable_scaling",
                .data           = &sysctl_sched_tunable_scaling,
@@ -322,14 +311,6 @@ static struct ctl_table kern_table[] = {
                .extra1         = &min_sched_tunable_scaling,
                .extra2         = &max_sched_tunable_scaling,
        },
-       {
-               .procname       = "sched_shares_thresh",
-               .data           = &sysctl_sched_shares_thresh,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &zero,
-       },
        {
                .procname       = "sched_migration_cost",
                .data           = &sysctl_sched_migration_cost,
@@ -351,6 +332,13 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "sched_shares_window",
+               .data           = &sysctl_sched_shares_window,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        {
                .procname       = "timer_migration",
                .data           = &sysctl_timer_migration,
@@ -382,6 +370,17 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+#ifdef CONFIG_SCHED_AUTOGROUP
+       {
+               .procname       = "sched_autogroup_enabled",
+               .data           = &sysctl_sched_autogroup_enabled,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+#endif
 #ifdef CONFIG_PROVE_LOCKING
        {
                .procname       = "prove_locking",
@@ -745,21 +744,21 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &one,
        },
-#endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
        {
-               .procname       = "unknown_nmi_panic",
-               .data           = &unknown_nmi_panic,
+               .procname       = "nmi_watchdog",
+               .data           = &watchdog_enabled,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dowatchdog_enabled,
        },
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
        {
-               .procname       = "nmi_watchdog",
-               .data           = &nmi_watchdog_enabled,
+               .procname       = "unknown_nmi_panic",
+               .data           = &unknown_nmi_panic,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = proc_nmi_enabled,
+               .proc_handler   = proc_dointvec,
        },
 #endif
 #if defined(CONFIG_X86)
index 1357c5786064e6c8f030defbbb7f76f690dc3c15..4b2545a136ffcec72d5073b579cf3f41c49fcbd3 100644 (file)
@@ -136,7 +136,6 @@ static const struct bin_table bin_kern_table[] = {
        { CTL_INT,      KERN_IA64_UNALIGNED,            "ignore-unaligned-usertrap" },
        { CTL_INT,      KERN_COMPAT_LOG,                "compat-log" },
        { CTL_INT,      KERN_MAX_LOCK_DEPTH,            "max_lock_depth" },
-       { CTL_INT,      KERN_NMI_WATCHDOG,              "nmi_watchdog" },
        { CTL_INT,      KERN_PANIC_ON_NMI,              "panic_on_unrecovered_nmi" },
        {}
 };
index c8231fb1570831d78215ab9967958a1663424ed0..3308fd7f1b52f170e4da5dd7e26749aca652ef1b 100644 (file)
@@ -349,25 +349,47 @@ static int parse(struct nlattr *na, struct cpumask *mask)
        return ret;
 }
 
+#ifdef CONFIG_IA64
+#define TASKSTATS_NEEDS_PADDING 1
+#endif
+
 static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
 {
        struct nlattr *na, *ret;
        int aggr;
 
-       /* If we don't pad, we end up with alignment on a 4 byte boundary.
-        * This causes lots of runtime warnings on systems requiring 8 byte
-        * alignment */
-       u32 pids[2] = { pid, 0 };
-       int pid_size = ALIGN(sizeof(pid), sizeof(long));
-
        aggr = (type == TASKSTATS_TYPE_PID)
                        ? TASKSTATS_TYPE_AGGR_PID
                        : TASKSTATS_TYPE_AGGR_TGID;
 
+       /*
+        * The taskstats structure is internally aligned on 8 byte
+        * boundaries but the layout of the aggregrate reply, with
+        * two NLA headers and the pid (each 4 bytes), actually
+        * force the entire structure to be unaligned. This causes
+        * the kernel to issue unaligned access warnings on some
+        * architectures like ia64. Unfortunately, some software out there
+        * doesn't properly unroll the NLA packet and assumes that the start
+        * of the taskstats structure will always be 20 bytes from the start
+        * of the netlink payload. Aligning the start of the taskstats
+        * structure breaks this software, which we don't want. So, for now
+        * the alignment only happens on architectures that require it
+        * and those users will have to update to fixed versions of those
+        * packages. Space is reserved in the packet only when needed.
+        * This ifdef should be removed in several years e.g. 2012 once
+        * we can be confident that fixed versions are installed on most
+        * systems. We add the padding before the aggregate since the
+        * aggregate is already a defined type.
+        */
+#ifdef TASKSTATS_NEEDS_PADDING
+       if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0)
+               goto err;
+#endif
        na = nla_nest_start(skb, aggr);
        if (!na)
                goto err;
-       if (nla_put(skb, type, pid_size, pids) < 0)
+
+       if (nla_put(skb, type, sizeof(pid), &pid) < 0)
                goto err;
        ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
        if (!ret)
@@ -456,6 +478,18 @@ out:
        return rc;
 }
 
+static size_t taskstats_packet_size(void)
+{
+       size_t size;
+
+       size = nla_total_size(sizeof(u32)) +
+               nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+#ifdef TASKSTATS_NEEDS_PADDING
+       size += nla_total_size(0); /* Padding for alignment */
+#endif
+       return size;
+}
+
 static int cmd_attr_pid(struct genl_info *info)
 {
        struct taskstats *stats;
@@ -464,8 +498,7 @@ static int cmd_attr_pid(struct genl_info *info)
        u32 pid;
        int rc;
 
-       size = nla_total_size(sizeof(u32)) +
-               nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+       size = taskstats_packet_size();
 
        rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
        if (rc < 0)
@@ -494,8 +527,7 @@ static int cmd_attr_tgid(struct genl_info *info)
        u32 tgid;
        int rc;
 
-       size = nla_total_size(sizeof(u32)) +
-               nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+       size = taskstats_packet_size();
 
        rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
        if (rc < 0)
@@ -570,8 +602,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
        /*
         * Size includes space for nested attributes
         */
-       size = nla_total_size(sizeof(u32)) +
-               nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+       size = taskstats_packet_size();
 
        is_thread_group = !!taskstats_tgid_alloc(tsk);
        if (is_thread_group) {
index ac38fbb176ccd0bb598b1eaaa7f2a703b17ec565..a9ae369925ce14fa4cf7ca1674d9f4903a6f4a24 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/math64.h>
+#include <linux/kernel.h>
 
 /*
  * fixed point arithmetic scale factor for skew
@@ -57,11 +58,11 @@ int timecompare_offset(struct timecompare *sync,
        int index;
        int num_samples = sync->num_samples;
 
-       if (num_samples > sizeof(buffer)/sizeof(buffer[0])) {
+       if (num_samples > ARRAY_SIZE(buffer)) {
                samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC);
                if (!samples) {
                        samples = buffer;
-                       num_samples = sizeof(buffer)/sizeof(buffer[0]);
+                       num_samples = ARRAY_SIZE(buffer);
                }
        } else {
                samples = buffer;
index 49010d822f725b47726742fa7e1b45aad076ef90..5bb86da8200373a2e6cd64fdcbe0355f43f5a27f 100644 (file)
@@ -32,6 +32,8 @@ struct timekeeper {
        cycle_t cycle_interval;
        /* Number of clock shifted nano seconds in one NTP interval. */
        u64     xtime_interval;
+       /* shifted nano seconds left over when rounding cycle_interval */
+       s64     xtime_remainder;
        /* Raw nano seconds accumulated per NTP interval. */
        u32     raw_interval;
 
@@ -62,7 +64,7 @@ struct timekeeper timekeeper;
 static void timekeeper_setup_internals(struct clocksource *clock)
 {
        cycle_t interval;
-       u64 tmp;
+       u64 tmp, ntpinterval;
 
        timekeeper.clock = clock;
        clock->cycle_last = clock->read(clock);
@@ -70,6 +72,7 @@ static void timekeeper_setup_internals(struct clocksource *clock)
        /* Do the ns -> cycle conversion first, using original mult */
        tmp = NTP_INTERVAL_LENGTH;
        tmp <<= clock->shift;
+       ntpinterval = tmp;
        tmp += clock->mult/2;
        do_div(tmp, clock->mult);
        if (tmp == 0)
@@ -80,6 +83,7 @@ static void timekeeper_setup_internals(struct clocksource *clock)
 
        /* Go back from cycles -> shifted ns */
        timekeeper.xtime_interval = (u64) interval * clock->mult;
+       timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval;
        timekeeper.raw_interval =
                ((u64) interval * clock->mult) >> clock->shift;
 
@@ -719,7 +723,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
 
        /* Accumulate error between NTP and clock interval */
        timekeeper.ntp_error += tick_length << shift;
-       timekeeper.ntp_error -= timekeeper.xtime_interval <<
+       timekeeper.ntp_error -=
+           (timekeeper.xtime_interval + timekeeper.xtime_remainder) <<
                                (timekeeper.ntp_error_shift + shift);
 
        return offset;
index ab8f5e33fa92c76db813d1419e6a339f3a7aca52..32a19f9397fc347c3144a01e142308c026f49c70 100644 (file)
@@ -79,26 +79,26 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
 {
        struct hrtimer *timer, tmp;
        unsigned long next = 0, i;
-       struct rb_node *curr;
+       struct timerqueue_node *curr;
        unsigned long flags;
 
 next_one:
        i = 0;
        raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
 
-       curr = base->first;
+       curr = timerqueue_getnext(&base->active);
        /*
         * Crude but we have to do this O(N*N) thing, because
         * we have to unlock the base when printing:
         */
        while (curr && i < next) {
-               curr = rb_next(curr);
+               curr = timerqueue_iterate_next(curr);
                i++;
        }
 
        if (curr) {
 
-               timer = rb_entry(curr, struct hrtimer, node);
+               timer = container_of(curr, struct hrtimer, node);
                tmp = *timer;
                raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
 
index 353b9227c2ecfe11793a17b0a41f534ebdbd14f8..43ca9936f2d06a2cba572f1c877406f9281e68a3 100644 (file)
@@ -88,18 +88,6 @@ struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
 static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
-/*
- * Note that all tvec_bases are 2 byte aligned and lower bit of
- * base in timer_list is guaranteed to be zero. Use the LSB to
- * indicate whether the timer is deferrable.
- *
- * A deferrable timer will work normally when the system is busy, but
- * will not cause a CPU to come out of idle just to service it; instead,
- * the timer will be serviced when the CPU eventually wakes up with a
- * subsequent non-deferrable timer.
- */
-#define TBASE_DEFERRABLE_FLAG          (0x1)
-
 /* Functions below help us manage 'deferrable' flag */
 static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
 {
@@ -113,8 +101,7 @@ static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
 
 static inline void timer_set_deferrable(struct timer_list *timer)
 {
-       timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
-                                      TBASE_DEFERRABLE_FLAG));
+       timer->base = TBASE_MAKE_DEFERRED(timer->base);
 }
 
 static inline void
@@ -343,15 +330,6 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
 }
 EXPORT_SYMBOL_GPL(set_timer_slack);
 
-
-static inline void set_running_timer(struct tvec_base *base,
-                                       struct timer_list *timer)
-{
-#ifdef CONFIG_SMP
-       base->running_timer = timer;
-#endif
-}
-
 static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
        unsigned long expires = timer->expires;
@@ -936,15 +914,12 @@ int del_timer(struct timer_list *timer)
 }
 EXPORT_SYMBOL(del_timer);
 
-#ifdef CONFIG_SMP
 /**
  * try_to_del_timer_sync - Try to deactivate a timer
  * @timer: timer do del
  *
  * This function tries to deactivate a timer. Upon successful (ret >= 0)
  * exit the timer is not queued and the handler is not running on any CPU.
- *
- * It must not be called from interrupt contexts.
  */
 int try_to_del_timer_sync(struct timer_list *timer)
 {
@@ -973,6 +948,7 @@ out:
 }
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
+#ifdef CONFIG_SMP
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -983,7 +959,7 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
  *
  * Synchronization rules: Callers must prevent restarting of the timer,
  * otherwise this function is meaningless. It must not be called from
- * interrupt contexts. The caller must not hold locks which would prevent
+ * hardirq contexts. The caller must not hold locks which would prevent
  * completion of the timer's handler. The timer's handler must not call
  * add_timer_on(). Upon exit the timer is not queued and the handler is
  * not running on any CPU.
@@ -993,14 +969,16 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
 int del_timer_sync(struct timer_list *timer)
 {
 #ifdef CONFIG_LOCKDEP
-       unsigned long flags;
-
-       local_irq_save(flags);
+       local_bh_disable();
        lock_map_acquire(&timer->lockdep_map);
        lock_map_release(&timer->lockdep_map);
-       local_irq_restore(flags);
+       local_bh_enable();
 #endif
-
+       /*
+        * don't use it in hardirq context, because it
+        * could lead to deadlock.
+        */
+       WARN_ON(in_irq());
        for (;;) {
                int ret = try_to_del_timer_sync(timer);
                if (ret >= 0)
@@ -1111,7 +1089,7 @@ static inline void __run_timers(struct tvec_base *base)
 
                        timer_stats_account_timer(timer);
 
-                       set_running_timer(base, timer);
+                       base->running_timer = timer;
                        detach_timer(timer, 1);
 
                        spin_unlock_irq(&base->lock);
@@ -1119,7 +1097,7 @@ static inline void __run_timers(struct tvec_base *base)
                        spin_lock_irq(&base->lock);
                }
        }
-       set_running_timer(base, NULL);
+       base->running_timer = NULL;
        spin_unlock_irq(&base->lock);
 }
 
@@ -1249,7 +1227,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
  */
 unsigned long get_next_timer_interrupt(unsigned long now)
 {
-       struct tvec_base *base = __get_cpu_var(tvec_bases);
+       struct tvec_base *base = __this_cpu_read(tvec_bases);
        unsigned long expires;
 
        /*
@@ -1298,7 +1276,7 @@ void update_process_times(int user_tick)
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
-       struct tvec_base *base = __get_cpu_var(tvec_bases);
+       struct tvec_base *base = __this_cpu_read(tvec_bases);
 
        hrtimer_run_pending();
 
index ea37e2ff416429d04cea3829e835ced7f0f35fc0..14674dce77a6c5a9cb8f17ffe2056f8e3ece1cc2 100644 (file)
@@ -69,6 +69,21 @@ config EVENT_TRACING
        select CONTEXT_SWITCH_TRACER
        bool
 
+config EVENT_POWER_TRACING_DEPRECATED
+       depends on EVENT_TRACING
+       bool "Deprecated power event trace API, to be removed"
+       default y
+       help
+         Provides old power event types:
+         C-state/idle accounting events:
+         power:power_start
+         power:power_end
+         and old cpufreq accounting event:
+         power:power_frequency
+         This is for userspace compatibility
+         and will vanish after 5 kernel iterations,
+         namely 2.6.41.
+
 config CONTEXT_SWITCH_TRACER
        bool
 
index a22582a061618cea52acee544d9e783ef1d9868e..f55fcf61b223d87e547568512ef2ae89e17107c5 100644 (file)
@@ -13,5 +13,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/power.h>
 
-EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
+#ifdef EVENT_POWER_TRACING_DEPRECATED
+EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
+#endif
+EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
 
index 9ed509a015d81a69cd452b3697658dd7fde9cb71..bd1c35a4fbccf31c0531f0667545011f704c561f 100644 (file)
@@ -3853,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 
                /* Need to copy one event at a time */
                do {
+                       /* We need the size of one event, because
+                        * rb_advance_reader only advances by one event,
+                        * whereas rb_event_ts_length may include the size of
+                        * one or two events.
+                        * We have already ensured there's enough space if this
+                        * is a time extend. */
+                       size = rb_event_length(event);
                        memcpy(bpage->data + pos, rpage->data + rpos, size);
 
                        len -= size;
@@ -3867,7 +3874,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
                        event = rb_reader_event(cpu_buffer);
                        /* Always keep the time extend and data together */
                        size = rb_event_ts_length(event);
-               } while (len > size);
+               } while (len >= size);
 
                /* update bpage */
                local_set(&bpage->commit, pos);
index 39c059ca670e64156e6681782ffa708c6b8d720f..19a359d5e6d58573cc1c74326e488a419b01b342 100644 (file)
@@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
 /* Count the events in use (per event id, not per instance) */
 static int     total_ref_count;
 
+static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
+                                struct perf_event *p_event)
+{
+       /* No tracing, just counting, so no obvious leak */
+       if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
+               return 0;
+
+       /* Some events are ok to be traced by non-root users... */
+       if (p_event->attach_state == PERF_ATTACH_TASK) {
+               if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
+                       return 0;
+       }
+
+       /*
+        * ...otherwise raw tracepoint data can be a severe data leak,
+        * only allow root to have these.
+        */
+       if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       return 0;
+}
+
 static int perf_trace_event_init(struct ftrace_event_call *tp_event,
                                 struct perf_event *p_event)
 {
        struct hlist_head __percpu *list;
-       int ret = -ENOMEM;
+       int ret;
        int cpu;
 
+       ret = perf_trace_event_perm(tp_event, p_event);
+       if (ret)
+               return ret;
+
        p_event->tp_event = tp_event;
        if (tp_event->perf_refcount++ > 0)
                return 0;
 
+       ret = -ENOMEM;
+
        list = alloc_percpu(struct hlist_head);
        if (!list)
                goto fail;
index 0725eeab1937ef24a301f2c0b0404f64ce95e026..35fde09b81dee7f386c111766b1fdbefac2d7414 100644 (file)
 
 DEFINE_MUTEX(event_mutex);
 
+DEFINE_MUTEX(event_storage_mutex);
+EXPORT_SYMBOL_GPL(event_storage_mutex);
+
+char event_storage[EVENT_STORAGE_SIZE];
+EXPORT_SYMBOL_GPL(event_storage);
+
 LIST_HEAD(ftrace_events);
 LIST_HEAD(ftrace_common_fields);
 
index 4ba44deaac259d05fb67d5d31e8ce5371844a414..4b74d71705c0d2be2a9adf67246823584bd34fcd 100644 (file)
@@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void)   \
 
 #undef __array
 #define __array(type, item, len)                                       \
-       BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                         \
-       ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+       do {                                                            \
+               BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                 \
+               mutex_lock(&event_storage_mutex);                       \
+               snprintf(event_storage, sizeof(event_storage),          \
+                        "%s[%d]", #type, len);                         \
+               ret = trace_define_field(event_call, event_storage, #item, \
                                 offsetof(typeof(field), item),         \
                                 sizeof(field.item),                    \
                                 is_signed_type(type), FILTER_OTHER);   \
-       if (ret)                                                        \
-               return ret;
+               mutex_unlock(&event_storage_mutex);                     \
+               if (ret)                                                \
+                       return ret;                                     \
+       } while (0);
 
 #undef __array_desc
 #define __array_desc(type, container, item, len)                       \
index 155a415b3209c0c4e65936be1b593678aea42d27..562c56e048fdbc34b18dded38cd21ccb3cd87f08 100644 (file)
@@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
 static int trace_wakeup_test_thread(void *data)
 {
        /* Make this a RT thread, doesn't need to be too high */
-       struct sched_param param = { .sched_priority = 5 };
+       static struct sched_param param = { .sched_priority = 5 };
        struct completion *x = data;
 
        sched_setscheduler(current, SCHED_FIFO, &param);
index 2c7d8d5914b188be65c36a686a81fba7eed07d8c..5c598ca781df4bf6f907043ed1d43f98b3b7ef58 100644 (file)
@@ -158,6 +158,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
                spin_lock_irq(&uidhash_lock);
                up = uid_hash_find(uid, hashent);
                if (up) {
+                       put_user_ns(ns);
                        key_put(new->uid_keyring);
                        key_put(new->session_keyring);
                        kmem_cache_free(uid_cachep, new);
index 6e3c41a4024c1cc66be01218e2c37498498f2469..6e7b575ac33cf2dcba3f9dc749f7039e6805a3f0 100644 (file)
@@ -57,6 +57,8 @@ static int __init hardlockup_panic_setup(char *str)
 {
        if (!strncmp(str, "panic", 5))
                hardlockup_panic = 1;
+       else if (!strncmp(str, "0", 1))
+               no_watchdog = 1;
        return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -307,7 +309,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  */
 static int watchdog(void *unused)
 {
-       struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+       static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
        struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
 
        sched_setscheduler(current, SCHED_FIFO, &param);
@@ -364,7 +366,8 @@ static int watchdog_nmi_enable(int cpu)
                goto out_save;
        }
 
-       printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
+       printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n",
+              cpu, PTR_ERR(event));
        return PTR_ERR(event);
 
        /* success path */
@@ -547,13 +550,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
        .notifier_call = cpu_callback
 };
 
-static int __init spawn_watchdog_task(void)
+void __init lockup_detector_init(void)
 {
        void *cpu = (void *)(long)smp_processor_id();
        int err;
 
        if (no_watchdog)
-               return 0;
+               return;
 
        err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
        WARN_ON(notifier_to_errno(err));
@@ -561,6 +564,5 @@ static int __init spawn_watchdog_task(void)
        cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
        register_cpu_notifier(&cpu_nfb);
 
-       return 0;
+       return;
 }
-early_initcall(spawn_watchdog_task);
index 28b42b9274d0b5fe47522d9df8158498be12319a..2d05adb984018776610f573de126f14bda4c9d2e 100644 (file)
@@ -173,7 +173,8 @@ config LOCKUP_DETECTOR
          An NMI is generated every 60 seconds or so to check for hardlockups.
 
 config HARDLOCKUP_DETECTOR
-       def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+       def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
+                !ARCH_HAS_NMI_WATCHDOG
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
        bool "Panic (Reboot) On Soft Lockups"
index e6a3763b82126729ecad6636caec9686cd7dec5f..9e2db72d128e6f22ec560ff7f04584777c293dd7 100644 (file)
@@ -8,7 +8,7 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
 endif
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-        rbtree.o radix-tree.o dump_stack.o \
+        rbtree.o radix-tree.o dump_stack.o timerqueue.o\
         idr.o int_sqrt.o extable.o prio_tree.o \
         sha1.o irq_regs.o reciprocal_div.o argv_split.o \
         proportions.o prio_heap.o ratelimit.o show_mem.o \
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
new file mode 100644 (file)
index 0000000..e3a1050
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ *  Generic Timer-queue
+ *
+ *  Manages a simple queue of timers, ordered by expiration time.
+ *  Uses rbtrees for quick list adds and expiration.
+ *
+ *  NOTE: All of the following functions need to be serialized
+ *  to avoid races. No locking is done by this libary code.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/timerqueue.h>
+#include <linux/rbtree.h>
+#include <linux/module.h>
+
+/**
+ * timerqueue_add - Adds timer to timerqueue.
+ *
+ * @head: head of timerqueue
+ * @node: timer node to be added
+ *
+ * Adds the timer node to the timerqueue, sorted by the
+ * node's expires value.
+ */
+void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
+{
+       struct rb_node **p = &head->head.rb_node;
+       struct rb_node *parent = NULL;
+       struct timerqueue_node  *ptr;
+
+       /* Make sure we don't add nodes that are already added */
+       WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
+
+       while (*p) {
+               parent = *p;
+               ptr = rb_entry(parent, struct timerqueue_node, node);
+               if (node->expires.tv64 < ptr->expires.tv64)
+                       p = &(*p)->rb_left;
+               else
+                       p = &(*p)->rb_right;
+       }
+       rb_link_node(&node->node, parent, p);
+       rb_insert_color(&node->node, &head->head);
+
+       if (!head->next || node->expires.tv64 < head->next->expires.tv64)
+               head->next = node;
+}
+EXPORT_SYMBOL_GPL(timerqueue_add);
+
+/**
+ * timerqueue_del - Removes a timer from the timerqueue.
+ *
+ * @head: head of timerqueue
+ * @node: timer node to be removed
+ *
+ * Removes the timer node from the timerqueue.
+ */
+void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
+{
+       WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
+
+       /* update next pointer */
+       if (head->next == node) {
+               struct rb_node *rbn = rb_next(&node->node);
+
+               head->next = rbn ?
+                       rb_entry(rbn, struct timerqueue_node, node) : NULL;
+       }
+       rb_erase(&node->node, &head->head);
+       RB_CLEAR_NODE(&node->node);
+}
+EXPORT_SYMBOL_GPL(timerqueue_del);
+
+/**
+ * timerqueue_iterate_next - Returns the timer after the provided timer
+ *
+ * @node: Pointer to a timer.
+ *
+ * Provides the timer that is after the given node. This is used, when
+ * necessary, to iterate through the list of timers in a timer list
+ * without modifying the list.
+ */
+struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
+{
+       struct rb_node *next;
+
+       if (!node)
+               return NULL;
+       next = rb_next(&node->node);
+       if (!next)
+               return NULL;
+       return container_of(next, struct timerqueue_node, node);
+}
+EXPORT_SYMBOL_GPL(timerqueue_iterate_next);
index 4d709ee5901370842534224a9f81e7d13943e196..1a8894eadf7275fc2a41f002cadaca4f86684cbf 100644 (file)
@@ -279,7 +279,6 @@ static unsigned long isolate_migratepages(struct zone *zone,
                /* Successfully isolated */
                del_page_from_lru_list(zone, page, page_lru(page));
                list_add(&page->lru, migratelist);
-               mem_cgroup_del_lru(page);
                cc->nr_migratepages++;
 
                /* Avoid isolating too much */
index 7a22b41292115f78dc039ebf5e271090dc5a8f9d..00bb8a64d028f945d50346c2332c6d4b54373621 100644 (file)
@@ -1925,19 +1925,18 @@ again:
 
                rcu_read_lock();
                p = rcu_dereference(mm->owner);
-               VM_BUG_ON(!p);
                /*
-                * because we don't have task_lock(), "p" can exit while
-                * we're here. In that case, "mem" can point to root
-                * cgroup but never be NULL. (and task_struct itself is freed
-                * by RCU, cgroup itself is RCU safe.) Then, we have small
-                * risk here to get wrong cgroup. But such kind of mis-account
-                * by race always happens because we don't have cgroup_mutex().
-                * It's overkill and we allow that small race, here.
+                * Because we don't have task_lock(), "p" can exit.
+                * In that case, "mem" can point to root or p can be NULL with
+                * race with swapoff. Then, we have small risk of mis-accouning.
+                * But such kind of mis-account by race always happens because
+                * we don't have cgroup_mutex(). It's overkill and we allo that
+                * small race, here.
+                * (*) swapoff at el will charge against mm-struct not against
+                * task-struct. So, mm->owner can be NULL.
                 */
                mem = mem_cgroup_from_task(p);
-               VM_BUG_ON(!mem);
-               if (mem_cgroup_is_root(mem)) {
+               if (!mem || mem_cgroup_is_root(mem)) {
                        rcu_read_unlock();
                        goto done;
                }
index fe5a3c6a54260f2ae9999fbaabe03082a818982f..6ae8a66a704575764ecf068c3f9a02bcc0a14575 100644 (file)
@@ -35,6 +35,8 @@
 #include <linux/hugetlb.h>
 #include <linux/gfp.h>
 
+#include <asm/tlbflush.h>
+
 #include "internal.h"
 
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
index 27a9ac58851678d30c7c477d00a857cf2f63bc35..ef4045d010d5f37631c3ad2f9de0e824e1de65a7 100644 (file)
@@ -10,7 +10,7 @@
  *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
  *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
  *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
- *  Copyright (c) 2007-2009 Paul Mundt <lethal@linux-sh.org>
+ *  Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org>
  */
 
 #include <linux/module.h>
@@ -328,6 +328,7 @@ void *vmalloc_node(unsigned long size, int node)
 {
        return vmalloc(size);
 }
+EXPORT_SYMBOL(vmalloc_node);
 
 /**
  * vzalloc_node - allocate memory on a specific node with zero fill
@@ -440,6 +441,31 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
 {
 }
 
+/**
+ *     alloc_vm_area - allocate a range of kernel address space
+ *     @size:          size of the area
+ *
+ *     Returns:        NULL on failure, vm_struct on success
+ *
+ *     This function reserves a range of kernel address space, and
+ *     allocates pagetables to map that range.  No actual mappings
+ *     are created.  If the kernel address space is not shared
+ *     between processes, it syncs the pagetable across all
+ *     processes.
+ */
+struct vm_struct *alloc_vm_area(size_t size)
+{
+       BUG();
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(alloc_vm_area);
+
+void free_vm_area(struct vm_struct *area)
+{
+       BUG();
+}
+EXPORT_SYMBOL_GPL(free_vm_area);
+
 int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
                   struct page *page)
 {
index b840afa89761ce0d83690ff963385399246fa6d8..b4edfe7ce06c1bf5f6d692235eac4e07f9068490 100644 (file)
@@ -563,7 +563,7 @@ static void balance_dirty_pages(struct address_space *mapping,
                                break;          /* We've done our duty */
                }
                trace_wbc_balance_dirty_wait(&wbc, bdi);
-               __set_current_state(TASK_INTERRUPTIBLE);
+               __set_current_state(TASK_UNINTERRUPTIBLE);
                io_schedule_timeout(pause);
 
                /*
index efe816856a9d777b284f8bf25cc7548154f374d6..02ba91230b99269f45beea6387c165e042800347 100644 (file)
@@ -1268,7 +1268,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
        /* we're done parsing the input, undefine BUG macro and dump config */
 #undef PCPU_SETUP_BUG_ON
-       pcpu_dump_alloc_info(KERN_INFO, ai);
+       pcpu_dump_alloc_info(KERN_DEBUG, ai);
 
        pcpu_nr_groups = ai->nr_groups;
        pcpu_group_offsets = group_offsets;
index fa642aa652bdba0d4b0b3f47c77dc9e55571c19f..432a9a633e8d8da4ddcabdb1a9a3cef1a55a3253 100644 (file)
@@ -311,6 +311,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
        d->state      = BT_OPEN;
        d->flags      = 0;
        d->mscex      = 0;
+       d->sec_level  = BT_SECURITY_LOW;
        d->mtu        = RFCOMM_DEFAULT_MTU;
        d->v24_sig    = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV;
 
index eb5b256ffc8801ff7e187c64ee3dde5da268f570..543b3262d002cdca0213348b4878c364622394b1 100644 (file)
@@ -437,7 +437,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
        ip6h = ipv6_hdr(skb);
 
        *(__force __be32 *)ip6h = htonl(0x60000000);
-       ip6h->payload_len = 8 + sizeof(*mldq);
+       ip6h->payload_len = htons(8 + sizeof(*mldq));
        ip6h->nexthdr = IPPROTO_HOPOPTS;
        ip6h->hop_limit = 1;
        ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
@@ -1430,7 +1430,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
                                 struct net_bridge_port *port,
                                 struct sk_buff *skb)
 {
-       struct sk_buff *skb2 = skb;
+       struct sk_buff *skb2;
        struct ipv6hdr *ip6h;
        struct icmp6hdr *icmp6h;
        u8 nexthdr;
@@ -1469,15 +1469,15 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
        if (!skb2)
                return -ENOMEM;
 
+       err = -EINVAL;
+       if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr)))
+               goto out;
+
        len -= offset - skb_network_offset(skb2);
 
        __skb_pull(skb2, offset);
        skb_reset_transport_header(skb2);
 
-       err = -EINVAL;
-       if (!pskb_may_pull(skb2, sizeof(*icmp6h)))
-               goto out;
-
        icmp6h = icmp6_hdr(skb2);
 
        switch (icmp6h->icmp6_type) {
@@ -1516,7 +1516,12 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
        switch (icmp6h->icmp6_type) {
        case ICMPV6_MGM_REPORT:
            {
-               struct mld_msg *mld = (struct mld_msg *)icmp6h;
+               struct mld_msg *mld;
+               if (!pskb_may_pull(skb2, sizeof(*mld))) {
+                       err = -EINVAL;
+                       goto out;
+               }
+               mld = (struct mld_msg *)skb_transport_header(skb2);
                BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
                err = br_ip6_multicast_add_group(br, port, &mld->mld_mca);
                break;
@@ -1529,15 +1534,18 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
                break;
        case ICMPV6_MGM_REDUCTION:
            {
-               struct mld_msg *mld = (struct mld_msg *)icmp6h;
+               struct mld_msg *mld;
+               if (!pskb_may_pull(skb2, sizeof(*mld))) {
+                       err = -EINVAL;
+                       goto out;
+               }
+               mld = (struct mld_msg *)skb_transport_header(skb2);
                br_ip6_multicast_leave_group(br, port, &mld->mld_mca);
            }
        }
 
 out:
-       __skb_push(skb2, offset);
-       if (skb2 != skb)
-               kfree_skb(skb2);
+       kfree_skb(skb2);
        return err;
 }
 #endif
index 35cf27087b561d6e9955fd75b4b03213a6e9e8d8..e3d7aefa91811d8945d9283d5de7ede9b168da34 100644 (file)
@@ -50,6 +50,8 @@ static void br_send_bpdu(struct net_bridge_port *p,
 
        llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr);
 
+       skb_reset_mac_header(skb);
+
        NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
                dev_queue_xmit);
 }
index 6faa8256e10ca22d6fb0b2005d74c5c5580a404d..9d5e8accfab1d73f25e8a00ae45523a400b820ce 100644 (file)
@@ -125,7 +125,7 @@ struct bcm_sock {
        struct list_head tx_ops;
        unsigned long dropped_usr_msgs;
        struct proc_dir_entry *bcm_proc_read;
-       char procname [20]; /* pointer printed in ASCII with \0 */
+       char procname [32]; /* inode number in decimal with \0 */
 };
 
 static inline struct bcm_sock *bcm_sk(const struct sock *sk)
@@ -1521,7 +1521,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
 
        if (proc_dir) {
                /* unique socket address as filename */
-               sprintf(bo->procname, "%p", sock);
+               sprintf(bo->procname, "%lu", sock_i_ino(sk));
                bo->bcm_proc_read = proc_create_data(bo->procname, 0644,
                                                     proc_dir,
                                                     &bcm_proc_fops, sk);
index 82a4369ae15091520d99effdcbb36dec7b3bab42..a20e5d3bbfa017db76959cfd448962c134d5d309 100644 (file)
@@ -181,8 +181,7 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 {
        int ret = 0;
 
-       if (rule->iifindex && (rule->iifindex != fl->iif) &&
-           !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF))
+       if (rule->iifindex && (rule->iifindex != fl->iif))
                goto out;
 
        if (rule->oifindex && (rule->oifindex != fl->oif))
index fb6080111461546953b34979db77f5e2d516e060..e5af8d5d5b505d79e3102a0aa879ba3d3e17f82b 100644 (file)
@@ -1009,6 +1009,36 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
 #endif
 }
 
+/*
+ * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * un-modified. Special care is taken when initializing object to zero.
+ */
+static inline void sk_prot_clear_nulls(struct sock *sk, int size)
+{
+       if (offsetof(struct sock, sk_node.next) != 0)
+               memset(sk, 0, offsetof(struct sock, sk_node.next));
+       memset(&sk->sk_node.pprev, 0,
+              size - offsetof(struct sock, sk_node.pprev));
+}
+
+void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
+{
+       unsigned long nulls1, nulls2;
+
+       nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
+       nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
+       if (nulls1 > nulls2)
+               swap(nulls1, nulls2);
+
+       if (nulls1 != 0)
+               memset((char *)sk, 0, nulls1);
+       memset((char *)sk + nulls1 + sizeof(void *), 0,
+              nulls2 - nulls1 - sizeof(void *));
+       memset((char *)sk + nulls2 + sizeof(void *), 0,
+              size - nulls2 - sizeof(void *));
+}
+EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
+
 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
                int family)
 {
@@ -1021,19 +1051,12 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
                if (!sk)
                        return sk;
                if (priority & __GFP_ZERO) {
-                       /*
-                        * caches using SLAB_DESTROY_BY_RCU should let
-                        * sk_node.next un-modified. Special care is taken
-                        * when initializing object to zero.
-                        */
-                       if (offsetof(struct sock, sk_node.next) != 0)
-                               memset(sk, 0, offsetof(struct sock, sk_node.next));
-                       memset(&sk->sk_node.pprev, 0,
-                              prot->obj_size - offsetof(struct sock,
-                                                        sk_node.pprev));
+                       if (prot->clear_sk)
+                               prot->clear_sk(sk, prot->obj_size);
+                       else
+                               sk_prot_clear_nulls(sk, prot->obj_size);
                }
-       }
-       else
+       } else
                sk = kmalloc(prot->obj_size, priority);
 
        if (sk != NULL) {
index eb6f69a8f27aff4db2de494389dd7d9584b93ece..c19c1f739fbadc48d39a87c25c550f49de100db4 100644 (file)
@@ -163,13 +163,19 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
                                .daddr = addr
                        }
                },
-               .flags = FLOWI_FLAG_MATCH_ANY_IIF
        };
        struct fib_result res = { 0 };
        struct net_device *dev = NULL;
+       struct fib_table *local_table;
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+       res.r = NULL;
+#endif
 
        rcu_read_lock();
-       if (fib_lookup(net, &fl, &res)) {
+       local_table = fib_get_table(net, RT_TABLE_LOCAL);
+       if (!local_table ||
+           fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
                rcu_read_unlock();
                return NULL;
        }
index 987bf9adb31833c19a0db04ce76060306d8e6994..93bfd95584f4656605eb963919de2d74b7c729f0 100644 (file)
@@ -2585,9 +2585,10 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
                        goto out;
 
                /* RACE: Check return value of inet_select_addr instead. */
-               if (rcu_dereference(dev_out->ip_ptr) == NULL)
-                       goto out;       /* Wrong error code */
-
+               if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
+                       err = -ENETUNREACH;
+                       goto out;
+               }
                if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
                    ipv4_is_lbcast(oldflp->fl4_dst)) {
                        if (!fl.fl4_src)
@@ -2648,8 +2649,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
        }
 
        if (res.type == RTN_LOCAL) {
-               if (!fl.fl4_src)
-                       fl.fl4_src = fl.fl4_dst;
+               if (!fl.fl4_src) {
+                       if (res.fi->fib_prefsrc)
+                               fl.fl4_src = res.fi->fib_prefsrc;
+                       else
+                               fl.fl4_src = fl.fl4_dst;
+               }
                dev_out = net->loopback_dev;
                fl.oif = dev_out->ifindex;
                res.fi = NULL;
index e13da6de1fc79e26dc6d03b671e1becf9e6c8754..d978bb2f748b34d99efb0162e4e083a460275630 100644 (file)
@@ -2030,7 +2030,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 get_req:
                        req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
                }
-               sk        = sk_next(st->syn_wait_sk);
+               sk        = sk_nulls_next(st->syn_wait_sk);
                st->state = TCP_SEQ_STATE_LISTENING;
                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
        } else {
@@ -2039,7 +2039,7 @@ get_req:
                if (reqsk_queue_len(&icsk->icsk_accept_queue))
                        goto start_req;
                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-               sk = sk_next(sk);
+               sk = sk_nulls_next(sk);
        }
 get_sk:
        sk_nulls_for_each_from(sk, node) {
index 5e0a3a582a59a05466468b244371dc5675bedc67..2d3ded4d078684298f9116f5c225f92b8908120e 100644 (file)
@@ -1899,6 +1899,7 @@ struct proto udp_prot = {
        .compat_setsockopt = compat_udp_setsockopt,
        .compat_getsockopt = compat_udp_getsockopt,
 #endif
+       .clear_sk          = sk_prot_clear_portaddr_nulls,
 };
 EXPORT_SYMBOL(udp_prot);
 
index ab76aa928fa98d6f1b8a8f6371d2b898ebdccb24..aee9963f7f5a497efc06429d3ab730e9f3efc999 100644 (file)
@@ -57,6 +57,7 @@ struct proto  udplite_prot = {
        .compat_setsockopt = compat_udp_setsockopt,
        .compat_getsockopt = compat_udp_getsockopt,
 #endif
+       .clear_sk          = sk_prot_clear_portaddr_nulls,
 };
 EXPORT_SYMBOL(udplite_prot);
 
index 93b7a933a7758254ed70ed849d333ad4d19114a7..848b355910424e75d42df6b9218eae3824ee213c 100644 (file)
@@ -2669,7 +2669,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
        ASSERT_RTNL();
 
-       rt6_ifdown(net, dev);
+       /* Flush routes if device is being removed or it is not loopback */
+       if (how || !(dev->flags & IFF_LOOPBACK))
+               rt6_ifdown(net, dev);
        neigh_ifdown(&nd_tbl, dev);
 
        idev = __in6_dev_get(dev);
index 99157b4cd56e2fa619939a17aa5b716680fc8799..94b5bf132b2e33a467f662b8c0e402a459f00a3d 100644 (file)
@@ -56,7 +56,7 @@
 #include <net/checksum.h>
 #include <linux/mroute6.h>
 
-static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 
 int __ip6_local_out(struct sk_buff *skb)
 {
@@ -145,14 +145,6 @@ static int ip6_finish_output2(struct sk_buff *skb)
        return -EINVAL;
 }
 
-static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
-{
-       struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
-
-       return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
-              skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
-}
-
 static int ip6_finish_output(struct sk_buff *skb)
 {
        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
@@ -601,7 +593,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
        return offset;
 }
 
-static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
        struct sk_buff *frag;
        struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
index 96455ffb76fb8b92aa90c3a711aa6635d45b91fa..7659d6f16e6bae4b7e8a1701161181f627c0e41f 100644 (file)
@@ -1565,11 +1565,16 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
 {
        struct rt6_info *rt, *nrt;
        int allfrag = 0;
-
+again:
        rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
        if (rt == NULL)
                return;
 
+       if (rt6_check_expired(rt)) {
+               ip6_del_rt(rt);
+               goto again;
+       }
+
        if (pmtu >= dst_mtu(&rt->dst))
                goto out;
 
index 91def93bec85060e7571218c20439cabd4ec824f..cd6cb7c3e5636e5911a36b5e1b4ebb4cb96b4be4 100644 (file)
@@ -1477,6 +1477,7 @@ struct proto udpv6_prot = {
        .compat_setsockopt = compat_udpv6_setsockopt,
        .compat_getsockopt = compat_udpv6_getsockopt,
 #endif
+       .clear_sk          = sk_prot_clear_portaddr_nulls,
 };
 
 static struct inet_protosw udpv6_protosw = {
index 5f48fadc27f7a62aa13632fc31bea73e1d1e6aec..986c4de5292eedf715b25b8bae98ee640917d97e 100644 (file)
@@ -55,6 +55,7 @@ struct proto udplitev6_prot = {
        .compat_setsockopt = compat_udpv6_setsockopt,
        .compat_getsockopt = compat_udpv6_getsockopt,
 #endif
+       .clear_sk          = sk_prot_clear_portaddr_nulls,
 };
 
 static struct inet_protosw udplite6_protosw = {
index 6434bd5ce0885ad20408a2f79eb198b8f30c269a..8e688b3de9abc62ec03c7dd90b33c951791c5587 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/dst.h>
 #include <net/ipv6.h>
+#include <net/ip6_route.h>
 #include <net/xfrm.h>
 
 int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
@@ -88,8 +89,21 @@ static int xfrm6_output_finish(struct sk_buff *skb)
        return xfrm_output(skb);
 }
 
+static int __xfrm6_output(struct sk_buff *skb)
+{
+       struct dst_entry *dst = skb_dst(skb);
+       struct xfrm_state *x = dst->xfrm;
+
+       if ((x && x->props.mode == XFRM_MODE_TUNNEL) &&
+           ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
+               dst_allfrag(skb_dst(skb)))) {
+                       return ip6_fragment(skb, xfrm6_output_finish);
+       }
+       return xfrm6_output_finish(skb);
+}
+
 int xfrm6_output(struct sk_buff *skb)
 {
        return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
-                      skb_dst(skb)->dev, xfrm6_output_finish);
+                      skb_dst(skb)->dev, __xfrm6_output);
 }
index a6de3059746ddd3f3039d564f20acf394225e3ad..c9890e25cd4c9681b14f838976a97ed4db560ac7 100644 (file)
@@ -2280,6 +2280,16 @@ static int irda_getsockopt(struct socket *sock, int level, int optname,
 
        switch (optname) {
        case IRLMP_ENUMDEVICES:
+
+               /* Offset to first device entry */
+               offset = sizeof(struct irda_device_list) -
+                       sizeof(struct irda_device_info);
+
+               if (len < offset) {
+                       err = -EINVAL;
+                       goto out;
+               }
+
                /* Ask lmp for the current discovery log */
                discoveries = irlmp_get_discoveries(&list.len, self->mask.word,
                                                    self->nslots);
@@ -2290,15 +2300,9 @@ static int irda_getsockopt(struct socket *sock, int level, int optname,
                }
 
                /* Write total list length back to client */
-               if (copy_to_user(optval, &list,
-                                sizeof(struct irda_device_list) -
-                                sizeof(struct irda_device_info)))
+               if (copy_to_user(optval, &list, offset))
                        err = -EFAULT;
 
-               /* Offset to first device entry */
-               offset = sizeof(struct irda_device_list) -
-                       sizeof(struct irda_device_info);
-
                /* Copy the list itself - watch for overflow */
                if (list.len > 2048) {
                        err = -EINVAL;
index 239c4836a946601f27c69f965ae1a0935e71b434..077a93dd1671a841b9db85fe4cf6981aa90c1edf 100644 (file)
@@ -780,6 +780,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 
        mutex_lock(&sdata->u.ibss.mtx);
 
+       if (!sdata->u.ibss.ssid_len)
+               goto mgmt_out; /* not ready to merge yet */
+
        switch (fc & IEEE80211_FCTL_STYPE) {
        case IEEE80211_STYPE_PROBE_REQ:
                ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len);
@@ -797,6 +800,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
                break;
        }
 
+ mgmt_out:
        mutex_unlock(&sdata->u.ibss.mtx);
 }
 
index 54fb4a0e76f03d835c6038cffeabf18874cb2d59..b01e467b76c69f15aa8de00434deca874b89ae48 100644 (file)
@@ -1788,9 +1788,11 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 
                        fwd_skb = skb_copy(skb, GFP_ATOMIC);
 
-                       if (!fwd_skb && net_ratelimit())
+                       if (!fwd_skb && net_ratelimit()) {
                                printk(KERN_DEBUG "%s: failed to clone mesh frame\n",
                                                   sdata->name);
+                               goto out;
+                       }
 
                        fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data;
                        memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
@@ -1828,6 +1830,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
                }
        }
 
+ out:
        if (is_multicast_ether_addr(hdr->addr1) ||
            sdata->dev->flags & IFF_PROMISC)
                return RX_CONTINUE;
index ae344d1ba0560480665e1b939b15b058f5fddc4c..146097cb43a710d756ab997770faa812293ba85e 100644 (file)
@@ -1051,11 +1051,13 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
 {
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_work *wk;
+       bool cleanup = false;
 
        mutex_lock(&local->mtx);
        list_for_each_entry(wk, &local->work_list, list) {
                if (wk->sdata != sdata)
                        continue;
+               cleanup = true;
                wk->type = IEEE80211_WORK_ABORT;
                wk->started = true;
                wk->timeout = jiffies;
@@ -1063,7 +1065,8 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
        mutex_unlock(&local->mtx);
 
        /* run cleanups etc. */
-       ieee80211_work_work(&local->work_work);
+       if (cleanup)
+               ieee80211_work_work(&local->work_work);
 
        mutex_lock(&local->mtx);
        list_for_each_entry(wk, &local->work_list, list) {
index 3cf478d012dd4f29b7a67f951c2cfa901b64248f..7150705f1d0b8b7aa12de87067928c25605ffbbb 100644 (file)
@@ -270,7 +270,6 @@ static unsigned int sfq_drop(struct Qdisc *sch)
                /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
                d = q->next[q->tail];
                q->next[q->tail] = q->next[d];
-               q->allot[q->next[d]] += q->quantum;
                skb = q->qs[d].prev;
                len = qdisc_pkt_len(skb);
                __skb_unlink(skb, &q->qs[d]);
@@ -321,14 +320,13 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        sfq_inc(q, x);
        if (q->qs[x].qlen == 1) {               /* The flow is new */
                if (q->tail == SFQ_DEPTH) {     /* It is the first flow */
-                       q->tail = x;
                        q->next[x] = x;
-                       q->allot[x] = q->quantum;
                } else {
                        q->next[x] = q->next[q->tail];
                        q->next[q->tail] = x;
-                       q->tail = x;
                }
+               q->tail = x;
+               q->allot[x] = q->quantum;
        }
        if (++sch->q.qlen <= q->limit) {
                sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -359,13 +357,13 @@ sfq_dequeue(struct Qdisc *sch)
 {
        struct sfq_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
-       sfq_index a, old_a;
+       sfq_index a, next_a;
 
        /* No active slots */
        if (q->tail == SFQ_DEPTH)
                return NULL;
 
-       a = old_a = q->next[q->tail];
+       a = q->next[q->tail];
 
        /* Grab packet */
        skb = __skb_dequeue(&q->qs[a]);
@@ -376,17 +374,15 @@ sfq_dequeue(struct Qdisc *sch)
        /* Is the slot empty? */
        if (q->qs[a].qlen == 0) {
                q->ht[q->hash[a]] = SFQ_DEPTH;
-               a = q->next[a];
-               if (a == old_a) {
+               next_a = q->next[a];
+               if (a == next_a) {
                        q->tail = SFQ_DEPTH;
                        return skb;
                }
-               q->next[q->tail] = a;
-               q->allot[a] += q->quantum;
+               q->next[q->tail] = next_a;
        } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
-               q->tail = a;
-               a = q->next[a];
                q->allot[a] += q->quantum;
+               q->tail = a;
        }
        return skb;
 }
index 0b9ee34ad35ceb31ff764baf9cec73fba010f55b..fff0926b11112a8dd04d11e1e7f9ba378c40fcc3 100644 (file)
@@ -5053,7 +5053,7 @@ static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len,
        if (copy_to_user(optval, &val, len))
                return -EFAULT;
 
-       return -ENOTSUPP;
+       return 0;
 }
 
 /*
index 5ad25e17b6cb2782a2101b59ad7cfd442a8af2ea..4eb99ab34053769f5b2b644594427b2bdc108c82 100644 (file)
@@ -214,17 +214,22 @@ ifdef BUILD_C_RECORDMCOUNT
 # The empty.o file is created in the make process in order to determine
 #  the target endianness and word size. It is made before all other C
 #  files, including recordmcount.
-cmd_record_mcount = if [ $(@) != "scripts/mod/empty.o" ]; then                 \
-                       $(objtree)/scripts/recordmcount "$(@)";                 \
-                   fi;
+sub_cmd_record_mcount =                                        \
+       if [ $(@) != "scripts/mod/empty.o" ]; then      \
+               $(objtree)/scripts/recordmcount "$(@)"; \
+       fi;
 else
-cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
+sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
        "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
        "$(if $(CONFIG_64BIT),64,32)" \
        "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \
        "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
        "$(if $(part-of-module),1,0)" "$(@)";
 endif
+cmd_record_mcount =                                            \
+       if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then    \
+               $(sub_cmd_record_mcount)                        \
+       fi;
 endif
 
 define rule_cc_o_c
index b9d9aa18e6d62bbaa40c34cf7062c00c8bb15e35..5f77dcb8977e0632b5875167d5a652ef62f4be1c 100644 (file)
@@ -140,6 +140,20 @@ struct property *menu_add_prop(enum prop_type type, char *prompt, struct expr *e
                }
                if (current_entry->prompt && current_entry != &rootmenu)
                        prop_warn(prop, "prompt redefined");
+
+               /* Apply all upper menus' visibilities to actual prompts. */
+               if(type == P_PROMPT) {
+                       struct menu *menu = current_entry;
+
+                       while ((menu = menu->parent) != NULL) {
+                               if (!menu->visibility)
+                                       continue;
+                               prop->visible.expr
+                                       = expr_alloc_and(prop->visible.expr,
+                                                        menu->visibility);
+                       }
+               }
+
                current_entry->prompt = prop;
        }
        prop->text = prompt;
index 39580a5dc5df6083a5766ae8853c6610e417474f..9f85012acf0d2fb749a4ded8bb962d514d524c03 100755 (executable)
@@ -155,6 +155,8 @@ use strict;
 # '@parameter' - name of a parameter
 # '%CONST' - name of a constant.
 
+## init lots of data
+
 my $errors = 0;
 my $warnings = 0;
 my $anon_struct_union = 0;
@@ -218,21 +220,14 @@ my %highlights_list = ( $type_constant, "\$1",
                        $type_param, "\$1" );
 my $blankline_list = "";
 
-sub usage {
-    print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man | -list ]\n";
-    print "         [ -no-doc-sections ]\n";
-    print "         [ -function funcname [ -function funcname ...] ]\n";
-    print "         [ -nofunction funcname [ -nofunction funcname ...] ]\n";
-    print "         c source file(s) > outputfile\n";
-    print "         -v : verbose output, more warnings & other info listed\n";
-    exit 1;
-}
-
 # read arguments
 if ($#ARGV == -1) {
     usage();
 }
 
+my $kernelversion;
+my $dohighlight = "";
+
 my $verbose = 0;
 my $output_mode = "man";
 my $no_doc_sections = 0;
@@ -245,7 +240,7 @@ my $man_date = ('January', 'February', 'March', 'April', 'May', 'June',
                'November', 'December')[(localtime)[4]] .
   " " . ((localtime)[5]+1900);
 
-# Essentially these are globals
+# Essentially these are globals.
 # They probably want to be tidied up, made more localised or something.
 # CAVEAT EMPTOR!  Some of the others I localised may not want to be, which
 # could cause "use of undefined value" or other bugs.
@@ -353,6 +348,18 @@ while ($ARGV[0] =~ m/^-(.*)/) {
     }
 }
 
+# continue execution near EOF;
+
+sub usage {
+    print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man | -list ]\n";
+    print "         [ -no-doc-sections ]\n";
+    print "         [ -function funcname [ -function funcname ...] ]\n";
+    print "         [ -nofunction funcname [ -nofunction funcname ...] ]\n";
+    print "         c source file(s) > outputfile\n";
+    print "         -v : verbose output, more warnings & other info listed\n";
+    exit 1;
+}
+
 # get kernel version from env
 sub get_kernel_version() {
     my $version = 'unknown kernel version';
@@ -362,15 +369,6 @@ sub get_kernel_version() {
     }
     return $version;
 }
-my $kernelversion = get_kernel_version();
-
-# generate a sequence of code that will splice in highlighting information
-# using the s// operator.
-my $dohighlight = "";
-foreach my $pattern (keys %highlights) {
-#   print STDERR "scanning pattern:$pattern, highlight:($highlights{$pattern})\n";
-    $dohighlight .=  "\$contents =~ s:$pattern:$highlights{$pattern}:gs;\n";
-}
 
 ##
 # dumps section contents to arrays/hashes intended for that purpose.
@@ -1851,34 +1849,6 @@ sub dump_function($$) {
                       });
 }
 
-sub process_file($);
-
-# Read the file that maps relative names to absolute names for
-# separate source and object directories and for shadow trees.
-if (open(SOURCE_MAP, "<.tmp_filelist.txt")) {
-       my ($relname, $absname);
-       while(<SOURCE_MAP>) {
-               chop();
-               ($relname, $absname) = (split())[0..1];
-               $relname =~ s:^/+::;
-               $source_map{$relname} = $absname;
-       }
-       close(SOURCE_MAP);
-}
-
-foreach (@ARGV) {
-    chomp;
-    process_file($_);
-}
-if ($verbose && $errors) {
-  print STDERR "$errors errors\n";
-}
-if ($verbose && $warnings) {
-  print STDERR "$warnings warnings\n";
-}
-
-exit($errors);
-
 sub reset_state {
     $function = "";
     %constants = ();
@@ -2285,3 +2255,39 @@ sub process_file($) {
        }
     }
 }
+
+
+$kernelversion = get_kernel_version();
+
+# generate a sequence of code that will splice in highlighting information
+# using the s// operator.
+foreach my $pattern (keys %highlights) {
+#   print STDERR "scanning pattern:$pattern, highlight:($highlights{$pattern})\n";
+    $dohighlight .=  "\$contents =~ s:$pattern:$highlights{$pattern}:gs;\n";
+}
+
+# Read the file that maps relative names to absolute names for
+# separate source and object directories and for shadow trees.
+if (open(SOURCE_MAP, "<.tmp_filelist.txt")) {
+       my ($relname, $absname);
+       while(<SOURCE_MAP>) {
+               chop();
+               ($relname, $absname) = (split())[0..1];
+               $relname =~ s:^/+::;
+               $source_map{$relname} = $absname;
+       }
+       close(SOURCE_MAP);
+}
+
+foreach (@ARGV) {
+    chomp;
+    process_file($_);
+}
+if ($verbose && $errors) {
+  print STDERR "$errors errors\n";
+}
+if ($verbose && $warnings) {
+  print STDERR "$warnings warnings\n";
+}
+
+exit($errors);
index aef8c0a923ab8e2276f97e1fe2aa61b7748e25e4..d661afbe474c2fce8f74907869be0a644c5789c9 100644 (file)
@@ -253,6 +253,8 @@ static int ima_lsm_rule_init(struct ima_measure_rule_entry *entry,
        result = security_filter_rule_init(entry->lsm[lsm_rule].type,
                                           Audit_equal, args,
                                           &entry->lsm[lsm_rule].rule);
+       if (!entry->lsm[lsm_rule].rule)
+               return -EINVAL;
        return result;
 }
 
index 0088dd8bf68a77b29637527733ff0b8985b8ecb7..0ea52d25a6bda8b3568fdb7752539403e93086b6 100644 (file)
@@ -403,7 +403,6 @@ link_check_failed:
        return ret;
 
 link_prealloc_failed:
-       up_write(&dest_keyring->sem);
        mutex_unlock(&user->cons_lock);
        kleave(" = %d [prelink]", ret);
        return ret;
index b75db8e9cc0f36fff03d0ecf1884b23b3bad724b..11446a1506dad244b2d11c447e6dc39e906427f1 100644 (file)
@@ -1070,8 +1070,10 @@ int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond,
                struct snd_pcm_hw_rule *new;
                unsigned int new_rules = constrs->rules_all + 16;
                new = kcalloc(new_rules, sizeof(*c), GFP_KERNEL);
-               if (!new)
+               if (!new) {
+                       va_end(args);
                        return -ENOMEM;
+               }
                if (constrs->rules) {
                        memcpy(new, constrs->rules,
                               constrs->rules_num * sizeof(*c));
@@ -1087,8 +1089,10 @@ int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond,
        c->private = private;
        k = 0;
        while (1) {
-               if (snd_BUG_ON(k >= ARRAY_SIZE(c->deps)))
+               if (snd_BUG_ON(k >= ARRAY_SIZE(c->deps))) {
+                       va_end(args);
                        return -EINVAL;
+               }
                c->deps[k++] = dep;
                if (dep < 0)
                        break;
@@ -1097,7 +1101,7 @@ int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond,
        constrs->rules_num++;
        va_end(args);
        return 0;
-}                                  
+}
 
 EXPORT_SYMBOL(snd_pcm_hw_rule_add);
 
index 46c0d03dbecced68ca318dbbe20df52d1da4ab0b..fcb14a09982262d473842aeb55f365b031ebeb70 100644 (file)
@@ -87,7 +87,7 @@ int *load_mixer_volumes(char *name, int *levels, int present)
        int             i, n;
 
        for (i = 0; i < num_mixer_volumes; i++) {
-               if (strcmp(name, mixer_vols[i].name) == 0) {
+               if (strncmp(name, mixer_vols[i].name, 32) == 0) {
                        if (present)
                                mixer_vols[i].num = i;
                        return mixer_vols[i].levels;
@@ -99,7 +99,7 @@ int *load_mixer_volumes(char *name, int *levels, int present)
        }
        n = num_mixer_volumes++;
 
-       strcpy(mixer_vols[n].name, name);
+       strncpy(mixer_vols[n].name, name, 32);
 
        if (present)
                mixer_vols[n].num = n;
index 644e3f14f8ca5aea7af2e465450caab86d2f90cb..98b6d02a36c9c0600a7b26dffbbf1f4f378fccd8 100644 (file)
@@ -1919,6 +1919,16 @@ struct snd_kcontrol *snd_hda_find_mixer_ctl(struct hda_codec *codec,
 }
 EXPORT_SYMBOL_HDA(snd_hda_find_mixer_ctl);
 
+static int find_empty_mixer_ctl_idx(struct hda_codec *codec, const char *name)
+{
+       int idx;
+       for (idx = 0; idx < 16; idx++) { /* 16 ctlrs should be large enough */
+               if (!_snd_hda_find_mixer_ctl(codec, name, idx))
+                       return idx;
+       }
+       return -EBUSY;
+}
+
 /**
  * snd_hda_ctl_add - Add a control element and assign to the codec
  * @codec: HD-audio codec
@@ -2654,8 +2664,6 @@ static struct snd_kcontrol_new dig_mixes[] = {
        { } /* end */
 };
 
-#define SPDIF_MAX_IDX  4       /* 4 instances should be enough to probe */
-
 /**
  * snd_hda_create_spdif_out_ctls - create Output SPDIF-related controls
  * @codec: the HDA codec
@@ -2673,12 +2681,8 @@ int snd_hda_create_spdif_out_ctls(struct hda_codec *codec, hda_nid_t nid)
        struct snd_kcontrol_new *dig_mix;
        int idx;
 
-       for (idx = 0; idx < SPDIF_MAX_IDX; idx++) {
-               if (!_snd_hda_find_mixer_ctl(codec, "IEC958 Playback Switch",
-                                            idx))
-                       break;
-       }
-       if (idx >= SPDIF_MAX_IDX) {
+       idx = find_empty_mixer_ctl_idx(codec, "IEC958 Playback Switch");
+       if (idx < 0) {
                printk(KERN_ERR "hda_codec: too many IEC958 outputs\n");
                return -EBUSY;
        }
@@ -2829,12 +2833,8 @@ int snd_hda_create_spdif_in_ctls(struct hda_codec *codec, hda_nid_t nid)
        struct snd_kcontrol_new *dig_mix;
        int idx;
 
-       for (idx = 0; idx < SPDIF_MAX_IDX; idx++) {
-               if (!_snd_hda_find_mixer_ctl(codec, "IEC958 Capture Switch",
-                                            idx))
-                       break;
-       }
-       if (idx >= SPDIF_MAX_IDX) {
+       idx = find_empty_mixer_ctl_idx(codec, "IEC958 Capture Switch");
+       if (idx < 0) {
                printk(KERN_ERR "hda_codec: too many IEC958 inputs\n");
                return -EBUSY;
        }
@@ -3808,21 +3808,32 @@ int snd_hda_add_new_ctls(struct hda_codec *codec, struct snd_kcontrol_new *knew)
 
        for (; knew->name; knew++) {
                struct snd_kcontrol *kctl;
+               int addr = 0, idx = 0;
                if (knew->iface == -1)  /* skip this codec private value */
                        continue;
-               kctl = snd_ctl_new1(knew, codec);
-               if (!kctl)
-                       return -ENOMEM;
-               err = snd_hda_ctl_add(codec, 0, kctl);
-               if (err < 0) {
-                       if (!codec->addr)
-                               return err;
+               for (;;) {
                        kctl = snd_ctl_new1(knew, codec);
                        if (!kctl)
                                return -ENOMEM;
-                       kctl->id.device = codec->addr;
+                       if (addr > 0)
+                               kctl->id.device = addr;
+                       if (idx > 0)
+                               kctl->id.index = idx;
                        err = snd_hda_ctl_add(codec, 0, kctl);
-                       if (err < 0)
+                       if (!err)
+                               break;
+                       /* try first with another device index corresponding to
+                        * the codec addr; if it still fails (or it's the
+                        * primary codec), then try another control index
+                        */
+                       if (!addr && codec->addr)
+                               addr = codec->addr;
+                       else if (!idx && !knew->index) {
+                               idx = find_empty_mixer_ctl_idx(codec,
+                                                              knew->name);
+                               if (idx <= 0)
+                                       return err;
+                       } else
                                return err;
                }
        }
index b030c8eba21fdc618fe0b5bfb32cf5b156c39bba..a1c4008af89185449567331db04014a31b87007c 100644 (file)
@@ -2300,6 +2300,7 @@ static struct snd_pci_quirk position_fix_list[] __devinitdata = {
        SND_PCI_QUIRK(0x1028, 0x01cc, "Dell D820", POS_FIX_LPIB),
        SND_PCI_QUIRK(0x1028, 0x01de, "Dell Precision 390", POS_FIX_LPIB),
        SND_PCI_QUIRK(0x1028, 0x01f6, "Dell Latitude 131L", POS_FIX_LPIB),
+       SND_PCI_QUIRK(0x1028, 0x0470, "Dell Inspiron 1120", POS_FIX_LPIB),
        SND_PCI_QUIRK(0x103c, 0x306d, "HP dv3", POS_FIX_LPIB),
        SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB),
        SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB),
index 427da45d7906560f517e04de74c84ab971c0a50a..552a09e9211ff355c185314339d48b99d5a0cb65 100644 (file)
@@ -14806,8 +14806,9 @@ static int alc269_resume(struct hda_codec *codec)
 
 enum {
        ALC269_FIXUP_SONY_VAIO,
+       ALC275_FIX_SONY_VAIO_GPIO2,
        ALC269_FIXUP_DELL_M101Z,
-       ALC269_FIXUP_LENOVO_EDGE14,
+       ALC269_FIXUP_SKU_IGNORE,
        ALC269_FIXUP_ASUS_G73JW,
 };
 
@@ -14818,6 +14819,14 @@ static const struct alc_fixup alc269_fixups[] = {
                        {}
                }
        },
+       [ALC275_FIX_SONY_VAIO_GPIO2] = {
+               .verbs = (const struct hda_verb[]) {
+                       {0x01, AC_VERB_SET_GPIO_MASK, 0x04},
+                       {0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04},
+                       {0x01, AC_VERB_SET_GPIO_DATA, 0x00},
+                       { }
+               }
+       },
        [ALC269_FIXUP_DELL_M101Z] = {
                .verbs = (const struct hda_verb[]) {
                        /* Enables internal speaker */
@@ -14826,7 +14835,7 @@ static const struct alc_fixup alc269_fixups[] = {
                        {}
                }
        },
-       [ALC269_FIXUP_LENOVO_EDGE14] = {
+       [ALC269_FIXUP_SKU_IGNORE] = {
                .sku = ALC_FIXUP_SKU_IGNORE,
        },
        [ALC269_FIXUP_ASUS_G73JW] = {
@@ -14838,9 +14847,13 @@ static const struct alc_fixup alc269_fixups[] = {
 };
 
 static struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x104d, 0x9073, "Sony VAIO", ALC275_FIX_SONY_VAIO_GPIO2),
+       SND_PCI_QUIRK(0x104d, 0x907b, "Sony VAIO", ALC275_FIX_SONY_VAIO_GPIO2),
+       SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIX_SONY_VAIO_GPIO2),
        SND_PCI_QUIRK_VENDOR(0x104d, "Sony VAIO", ALC269_FIXUP_SONY_VAIO),
        SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
-       SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_LENOVO_EDGE14),
+       SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
+       SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
        SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
        {}
 };
@@ -15091,28 +15104,29 @@ static int patch_alc269(struct hda_codec *codec)
 
        alc_auto_parse_customize_define(codec);
 
-       coef = alc_read_coef_idx(codec, 0);
-       if ((coef & 0x00f0) == 0x0010) {
-               if (codec->bus->pci->subsystem_vendor == 0x1025 &&
-                   spec->cdefine.platform_type == 1) {
-                       alc_codec_rename(codec, "ALC271X");
-                       spec->codec_variant = ALC269_TYPE_ALC271X;
-               } else if ((coef & 0xf000) == 0x1000) {
-                       spec->codec_variant = ALC269_TYPE_ALC270;
-               } else if ((coef & 0xf000) == 0x2000) {
-                       alc_codec_rename(codec, "ALC259");
-                       spec->codec_variant = ALC269_TYPE_ALC259;
-               } else if ((coef & 0xf000) == 0x3000) {
-                       alc_codec_rename(codec, "ALC258");
-                       spec->codec_variant = ALC269_TYPE_ALC258;
-               } else {
-                       alc_codec_rename(codec, "ALC269VB");
-                       spec->codec_variant = ALC269_TYPE_ALC269VB;
-               }
-       } else
-               alc_fix_pll_init(codec, 0x20, 0x04, 15);
-
-       alc269_fill_coef(codec);
+       if (codec->vendor_id == 0x10ec0269) {
+               coef = alc_read_coef_idx(codec, 0);
+               if ((coef & 0x00f0) == 0x0010) {
+                       if (codec->bus->pci->subsystem_vendor == 0x1025 &&
+                           spec->cdefine.platform_type == 1) {
+                               alc_codec_rename(codec, "ALC271X");
+                               spec->codec_variant = ALC269_TYPE_ALC271X;
+                       } else if ((coef & 0xf000) == 0x1000) {
+                               spec->codec_variant = ALC269_TYPE_ALC270;
+                       } else if ((coef & 0xf000) == 0x2000) {
+                               alc_codec_rename(codec, "ALC259");
+                               spec->codec_variant = ALC269_TYPE_ALC259;
+                       } else if ((coef & 0xf000) == 0x3000) {
+                               alc_codec_rename(codec, "ALC258");
+                               spec->codec_variant = ALC269_TYPE_ALC258;
+                       } else {
+                               alc_codec_rename(codec, "ALC269VB");
+                               spec->codec_variant = ALC269_TYPE_ALC269VB;
+                       }
+               } else
+                       alc_fix_pll_init(codec, 0x20, 0x04, 15);
+               alc269_fill_coef(codec);
+       }
 
        board_config = snd_hda_check_board_config(codec, ALC269_MODEL_LAST,
                                                  alc269_models,
index efa4225f5fd6c95f9f0218d38002d1ca12d88bd3..f03b2ff90496f86151b01cec6131e9cc94996a42 100644 (file)
@@ -3481,6 +3481,8 @@ static int stac92xx_auto_create_dmic_input_ctls(struct hda_codec *codec,
 
                label = hda_get_input_pin_label(codec, nid, 1);
                snd_hda_add_imux_item(dimux, label, index, &type_idx);
+               if (snd_hda_get_bool_hint(codec, "separate_dmux") != 1)
+                       snd_hda_add_imux_item(imux, label, index, &type_idx);
 
                err = create_elem_capture_vol(codec, nid, label, type_idx,
                                              HDA_INPUT);
@@ -3492,9 +3494,6 @@ static int stac92xx_auto_create_dmic_input_ctls(struct hda_codec *codec,
                        if (err < 0)
                                return err;
                }
-
-               if (snd_hda_get_bool_hint(codec, "separate_dmux") != 1)
-                       snd_hda_add_imux_item(imux, label, index, NULL);
        }
 
        return 0;
index d63e28773eb1841ac7bdd06af0d37a0d5c03013b..6447dbb2f1238f1288c6ed6a797757a08ddd69fa 100644 (file)
@@ -40,7 +40,6 @@ struct max98088_cdata {
 };
 
 struct max98088_priv {
-       u8 reg_cache[M98088_REG_CNT];
        enum max98088_type devtype;
        void *control_data;
        struct max98088_pdata *pdata;
@@ -1588,7 +1587,7 @@ static int max98088_dai2_set_fmt(struct snd_soc_dai *codec_dai,
 
 static void max98088_sync_cache(struct snd_soc_codec *codec)
 {
-       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int i;
 
        if (!codec->cache_sync)
@@ -1599,14 +1598,14 @@ static void max98088_sync_cache(struct snd_soc_codec *codec)
        /* write back cached values if they're writeable and
         * different from the hardware default.
         */
-       for (i = 1; i < ARRAY_SIZE(max98088->reg_cache); i++) {
+       for (i = 1; i < codec->driver->reg_cache_size; i++) {
                if (!max98088_access[i].writable)
                        continue;
 
-               if (max98088->reg_cache[i] == max98088_reg[i])
+               if (reg_cache[i] == max98088_reg[i])
                        continue;
 
-               snd_soc_write(codec, i, max98088->reg_cache[i]);
+               snd_soc_write(codec, i, reg_cache[i]);
        }
 
        codec->cache_sync = 0;
@@ -1951,7 +1950,6 @@ static int max98088_probe(struct snd_soc_codec *codec)
        int ret = 0;
 
        codec->cache_sync = 1;
-       memcpy(codec->reg_cache, max98088_reg, sizeof(max98088_reg));
 
        ret = snd_soc_codec_set_cache_io(codec, 8, 8, SND_SOC_I2C);
        if (ret != 0) {
index 9a433a5396cb781727e39a4c1c6005f1bbc9a421..deca79ea2b4b98ab6d3232c9cb3c1b4c3e6858dd 100644 (file)
@@ -41,7 +41,6 @@ static const char *wm8523_supply_names[WM8523_NUM_SUPPLIES] = {
 /* codec private data */
 struct wm8523_priv {
        enum snd_soc_control_type control_type;
-       u16 reg_cache[WM8523_REGISTER_COUNT];
        struct regulator_bulk_data supplies[WM8523_NUM_SUPPLIES];
        unsigned int sysclk;
        unsigned int rate_constraint_list[WM8523_NUM_RATES];
@@ -314,6 +313,7 @@ static int wm8523_set_bias_level(struct snd_soc_codec *codec,
                                 enum snd_soc_bias_level level)
 {
        struct wm8523_priv *wm8523 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int ret, i;
 
        switch (level) {
@@ -344,7 +344,7 @@ static int wm8523_set_bias_level(struct snd_soc_codec *codec,
                        /* Sync back default/cached values */
                        for (i = WM8523_AIF_CTRL1;
                             i < WM8523_MAX_REGISTER; i++)
-                               snd_soc_write(codec, i, wm8523->reg_cache[i]);
+                               snd_soc_write(codec, i, reg_cache[i]);
 
 
                        msleep(100);
@@ -414,6 +414,7 @@ static int wm8523_resume(struct snd_soc_codec *codec)
 static int wm8523_probe(struct snd_soc_codec *codec)
 {
        struct wm8523_priv *wm8523 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int ret, i;
 
        codec->hw_write = (hw_write_t)i2c_master_send;
@@ -470,8 +471,8 @@ static int wm8523_probe(struct snd_soc_codec *codec)
        }
 
        /* Change some default settings - latch VU and enable ZC */
-       wm8523->reg_cache[WM8523_DAC_GAINR] |= WM8523_DACR_VU;
-       wm8523->reg_cache[WM8523_DAC_CTRL3] |= WM8523_ZC;
+       reg_cache[WM8523_DAC_GAINR] |= WM8523_DACR_VU;
+       reg_cache[WM8523_DAC_CTRL3] |= WM8523_ZC;
 
        wm8523_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 
index 90e31e9aa6f7c66346c8360a3c1ab900519e4a98..aea60ef8aba73da14e7cd17f62b72955adfd645a 100644 (file)
@@ -41,7 +41,6 @@ static const char *wm8741_supply_names[WM8741_NUM_SUPPLIES] = {
 /* codec private data */
 struct wm8741_priv {
        enum snd_soc_control_type control_type;
-       u16 reg_cache[WM8741_REGISTER_COUNT];
        struct regulator_bulk_data supplies[WM8741_NUM_SUPPLIES];
        unsigned int sysclk;
        struct snd_pcm_hw_constraint_list *sysclk_constraints;
@@ -422,6 +421,7 @@ static int wm8741_resume(struct snd_soc_codec *codec)
 static int wm8741_probe(struct snd_soc_codec *codec)
 {
        struct wm8741_priv *wm8741 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int ret = 0;
 
        ret = snd_soc_codec_set_cache_io(codec, 7, 9, wm8741->control_type);
@@ -437,10 +437,10 @@ static int wm8741_probe(struct snd_soc_codec *codec)
        }
 
        /* Change some default settings - latch VU */
-       wm8741->reg_cache[WM8741_DACLLSB_ATTENUATION] |= WM8741_UPDATELL;
-       wm8741->reg_cache[WM8741_DACLMSB_ATTENUATION] |= WM8741_UPDATELM;
-       wm8741->reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERL;
-       wm8741->reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERM;
+       reg_cache[WM8741_DACLLSB_ATTENUATION] |= WM8741_UPDATELL;
+       reg_cache[WM8741_DACLMSB_ATTENUATION] |= WM8741_UPDATELM;
+       reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERL;
+       reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERM;
 
        snd_soc_add_controls(codec, wm8741_snd_controls,
                             ARRAY_SIZE(wm8741_snd_controls));
index 8f679a13f2bcaae23653adaffafff6fedcd8e66f..87caae59e939c78465750f470d5d4ce35ee505d3 100644 (file)
@@ -65,22 +65,22 @@ static void wm8753_set_dai_mode(struct snd_soc_codec *codec,
  * are using 2 wire for device control, so we cache them instead.
  */
 static const u16 wm8753_reg[] = {
-       0x0008, 0x0000, 0x000a, 0x000a,
-       0x0033, 0x0000, 0x0007, 0x00ff,
-       0x00ff, 0x000f, 0x000f, 0x007b,
-       0x0000, 0x0032, 0x0000, 0x00c3,
-       0x00c3, 0x00c0, 0x0000, 0x0000,
+       0x0000, 0x0008, 0x0000, 0x000a,
+       0x000a, 0x0033, 0x0000, 0x0007,
+       0x00ff, 0x00ff, 0x000f, 0x000f,
+       0x007b, 0x0000, 0x0032, 0x0000,
+       0x00c3, 0x00c3, 0x00c0, 0x0000,
        0x0000, 0x0000, 0x0000, 0x0000,
        0x0000, 0x0000, 0x0000, 0x0000,
-       0x0000, 0x0000, 0x0000, 0x0055,
-       0x0005, 0x0050, 0x0055, 0x0050,
-       0x0055, 0x0050, 0x0055, 0x0079,
-       0x0079, 0x0079, 0x0079, 0x0079,
        0x0000, 0x0000, 0x0000, 0x0000,
-       0x0097, 0x0097, 0x0000, 0x0004,
-       0x0000, 0x0083, 0x0024, 0x01ba,
-       0x0000, 0x0083, 0x0024, 0x01ba,
-       0x0000, 0x0000, 0x0000
+       0x0055, 0x0005, 0x0050, 0x0055,
+       0x0050, 0x0055, 0x0050, 0x0055,
+       0x0079, 0x0079, 0x0079, 0x0079,
+       0x0079, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0097, 0x0097, 0x0000,
+       0x0004, 0x0000, 0x0083, 0x0024,
+       0x01ba, 0x0000, 0x0083, 0x0024,
+       0x01ba, 0x0000, 0x0000, 0x0000
 };
 
 /* codec private data */
@@ -88,57 +88,10 @@ struct wm8753_priv {
        enum snd_soc_control_type control_type;
        unsigned int sysclk;
        unsigned int pcmclk;
-       u16 reg_cache[ARRAY_SIZE(wm8753_reg)];
        int dai_func;
 };
 
-/*
- * read wm8753 register cache
- */
-static inline unsigned int wm8753_read_reg_cache(struct snd_soc_codec *codec,
-       unsigned int reg)
-{
-       u16 *cache = codec->reg_cache;
-       if (reg < 1 || reg >= (ARRAY_SIZE(wm8753_reg) + 1))
-               return -1;
-       return cache[reg - 1];
-}
-
-/*
- * write wm8753 register cache
- */
-static inline void wm8753_write_reg_cache(struct snd_soc_codec *codec,
-       unsigned int reg, unsigned int value)
-{
-       u16 *cache = codec->reg_cache;
-       if (reg < 1 || reg >= (ARRAY_SIZE(wm8753_reg) + 1))
-               return;
-       cache[reg - 1] = value;
-}
-
-/*
- * write to the WM8753 register space
- */
-static int wm8753_write(struct snd_soc_codec *codec, unsigned int reg,
-       unsigned int value)
-{
-       u8 data[2];
-
-       /* data is
-        *   D15..D9 WM8753 register offset
-        *   D8...D0 register data
-        */
-       data[0] = (reg << 1) | ((value >> 8) & 0x0001);
-       data[1] = value & 0x00ff;
-
-       wm8753_write_reg_cache(codec, reg, value);
-       if (codec->hw_write(codec->control_data, data, 2) == 2)
-               return 0;
-       else
-               return -EIO;
-}
-
-#define wm8753_reset(c) wm8753_write(c, WM8753_RESET, 0)
+#define wm8753_reset(c) snd_soc_write(c, WM8753_RESET, 0)
 
 /*
  * WM8753 Controls
@@ -218,7 +171,7 @@ static int wm8753_get_dai(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_codec *codec =  snd_kcontrol_chip(kcontrol);
-       int mode = wm8753_read_reg_cache(codec, WM8753_IOCTL);
+       int mode = snd_soc_read(codec, WM8753_IOCTL);
 
        ucontrol->value.integer.value[0] = (mode & 0xc) >> 2;
        return 0;
@@ -228,7 +181,7 @@ static int wm8753_set_dai(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_codec *codec =  snd_kcontrol_chip(kcontrol);
-       int mode = wm8753_read_reg_cache(codec, WM8753_IOCTL);
+       int mode = snd_soc_read(codec, WM8753_IOCTL);
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
 
        if (((mode & 0xc) >> 2) == ucontrol->value.integer.value[0])
@@ -738,17 +691,17 @@ static int wm8753_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id,
        if (pll_id == WM8753_PLL1) {
                offset = 0;
                enable = 0x10;
-               reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xffef;
+               reg = snd_soc_read(codec, WM8753_CLOCK) & 0xffef;
        } else {
                offset = 4;
                enable = 0x8;
-               reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfff7;
+               reg = snd_soc_read(codec, WM8753_CLOCK) & 0xfff7;
        }
 
        if (!freq_in || !freq_out) {
                /* disable PLL  */
-               wm8753_write(codec, WM8753_PLL1CTL1 + offset, 0x0026);
-               wm8753_write(codec, WM8753_CLOCK, reg);
+               snd_soc_write(codec, WM8753_PLL1CTL1 + offset, 0x0026);
+               snd_soc_write(codec, WM8753_CLOCK, reg);
                return 0;
        } else {
                u16 value = 0;
@@ -759,20 +712,20 @@ static int wm8753_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id,
                /* set up N and K PLL divisor ratios */
                /* bits 8:5 = PLL_N, bits 3:0 = PLL_K[21:18] */
                value = (pll_div.n << 5) + ((pll_div.k & 0x3c0000) >> 18);
-               wm8753_write(codec, WM8753_PLL1CTL2 + offset, value);
+               snd_soc_write(codec, WM8753_PLL1CTL2 + offset, value);
 
                /* bits 8:0 = PLL_K[17:9] */
                value = (pll_div.k & 0x03fe00) >> 9;
-               wm8753_write(codec, WM8753_PLL1CTL3 + offset, value);
+               snd_soc_write(codec, WM8753_PLL1CTL3 + offset, value);
 
                /* bits 8:0 = PLL_K[8:0] */
                value = pll_div.k & 0x0001ff;
-               wm8753_write(codec, WM8753_PLL1CTL4 + offset, value);
+               snd_soc_write(codec, WM8753_PLL1CTL4 + offset, value);
 
                /* set PLL as input and enable */
-               wm8753_write(codec, WM8753_PLL1CTL1 + offset, 0x0027 |
+               snd_soc_write(codec, WM8753_PLL1CTL1 + offset, 0x0027 |
                        (pll_div.div2 << 3));
-               wm8753_write(codec, WM8753_CLOCK, reg | enable);
+               snd_soc_write(codec, WM8753_CLOCK, reg | enable);
        }
        return 0;
 }
@@ -879,7 +832,7 @@ static int wm8753_vdac_adc_set_dai_fmt(struct snd_soc_dai *codec_dai,
                unsigned int fmt)
 {
        struct snd_soc_codec *codec = codec_dai->codec;
-       u16 voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x01ec;
+       u16 voice = snd_soc_read(codec, WM8753_PCM) & 0x01ec;
 
        /* interface format */
        switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
@@ -901,7 +854,7 @@ static int wm8753_vdac_adc_set_dai_fmt(struct snd_soc_dai *codec_dai,
                return -EINVAL;
        }
 
-       wm8753_write(codec, WM8753_PCM, voice);
+       snd_soc_write(codec, WM8753_PCM, voice);
        return 0;
 }
 
@@ -922,8 +875,8 @@ static int wm8753_pcm_hw_params(struct snd_pcm_substream *substream,
        struct snd_soc_pcm_runtime *rtd = substream->private_data;
        struct snd_soc_codec *codec = rtd->codec;
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
-       u16 voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x01f3;
-       u16 srate = wm8753_read_reg_cache(codec, WM8753_SRATE1) & 0x017f;
+       u16 voice = snd_soc_read(codec, WM8753_PCM) & 0x01f3;
+       u16 srate = snd_soc_read(codec, WM8753_SRATE1) & 0x017f;
 
        /* bit size */
        switch (params_format(params)) {
@@ -943,9 +896,9 @@ static int wm8753_pcm_hw_params(struct snd_pcm_substream *substream,
        /* sample rate */
        if (params_rate(params) * 384 == wm8753->pcmclk)
                srate |= 0x80;
-       wm8753_write(codec, WM8753_SRATE1, srate);
+       snd_soc_write(codec, WM8753_SRATE1, srate);
 
-       wm8753_write(codec, WM8753_PCM, voice);
+       snd_soc_write(codec, WM8753_PCM, voice);
        return 0;
 }
 
@@ -958,8 +911,8 @@ static int wm8753_pcm_set_dai_fmt(struct snd_soc_dai *codec_dai,
        struct snd_soc_codec *codec = codec_dai->codec;
        u16 voice, ioctl;
 
-       voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x011f;
-       ioctl = wm8753_read_reg_cache(codec, WM8753_IOCTL) & 0x015d;
+       voice = snd_soc_read(codec, WM8753_PCM) & 0x011f;
+       ioctl = snd_soc_read(codec, WM8753_IOCTL) & 0x015d;
 
        /* set master/slave audio interface */
        switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
@@ -1013,8 +966,8 @@ static int wm8753_pcm_set_dai_fmt(struct snd_soc_dai *codec_dai,
                return -EINVAL;
        }
 
-       wm8753_write(codec, WM8753_PCM, voice);
-       wm8753_write(codec, WM8753_IOCTL, ioctl);
+       snd_soc_write(codec, WM8753_PCM, voice);
+       snd_soc_write(codec, WM8753_IOCTL, ioctl);
        return 0;
 }
 
@@ -1026,16 +979,16 @@ static int wm8753_set_dai_clkdiv(struct snd_soc_dai *codec_dai,
 
        switch (div_id) {
        case WM8753_PCMDIV:
-               reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0x003f;
-               wm8753_write(codec, WM8753_CLOCK, reg | div);
+               reg = snd_soc_read(codec, WM8753_CLOCK) & 0x003f;
+               snd_soc_write(codec, WM8753_CLOCK, reg | div);
                break;
        case WM8753_BCLKDIV:
-               reg = wm8753_read_reg_cache(codec, WM8753_SRATE2) & 0x01c7;
-               wm8753_write(codec, WM8753_SRATE2, reg | div);
+               reg = snd_soc_read(codec, WM8753_SRATE2) & 0x01c7;
+               snd_soc_write(codec, WM8753_SRATE2, reg | div);
                break;
        case WM8753_VXCLKDIV:
-               reg = wm8753_read_reg_cache(codec, WM8753_SRATE2) & 0x003f;
-               wm8753_write(codec, WM8753_SRATE2, reg | div);
+               reg = snd_soc_read(codec, WM8753_SRATE2) & 0x003f;
+               snd_soc_write(codec, WM8753_SRATE2, reg | div);
                break;
        default:
                return -EINVAL;
@@ -1050,7 +1003,7 @@ static int wm8753_hdac_set_dai_fmt(struct snd_soc_dai *codec_dai,
                unsigned int fmt)
 {
        struct snd_soc_codec *codec = codec_dai->codec;
-       u16 hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x01e0;
+       u16 hifi = snd_soc_read(codec, WM8753_HIFI) & 0x01e0;
 
        /* interface format */
        switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
@@ -1072,7 +1025,7 @@ static int wm8753_hdac_set_dai_fmt(struct snd_soc_dai *codec_dai,
                return -EINVAL;
        }
 
-       wm8753_write(codec, WM8753_HIFI, hifi);
+       snd_soc_write(codec, WM8753_HIFI, hifi);
        return 0;
 }
 
@@ -1085,8 +1038,8 @@ static int wm8753_i2s_set_dai_fmt(struct snd_soc_dai *codec_dai,
        struct snd_soc_codec *codec = codec_dai->codec;
        u16 ioctl, hifi;
 
-       hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x011f;
-       ioctl = wm8753_read_reg_cache(codec, WM8753_IOCTL) & 0x00ae;
+       hifi = snd_soc_read(codec, WM8753_HIFI) & 0x011f;
+       ioctl = snd_soc_read(codec, WM8753_IOCTL) & 0x00ae;
 
        /* set master/slave audio interface */
        switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
@@ -1140,8 +1093,8 @@ static int wm8753_i2s_set_dai_fmt(struct snd_soc_dai *codec_dai,
                return -EINVAL;
        }
 
-       wm8753_write(codec, WM8753_HIFI, hifi);
-       wm8753_write(codec, WM8753_IOCTL, ioctl);
+       snd_soc_write(codec, WM8753_HIFI, hifi);
+       snd_soc_write(codec, WM8753_IOCTL, ioctl);
        return 0;
 }
 
@@ -1162,8 +1115,8 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream,
        struct snd_soc_pcm_runtime *rtd = substream->private_data;
        struct snd_soc_codec *codec = rtd->codec;
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
-       u16 srate = wm8753_read_reg_cache(codec, WM8753_SRATE1) & 0x01c0;
-       u16 hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x01f3;
+       u16 srate = snd_soc_read(codec, WM8753_SRATE1) & 0x01c0;
+       u16 hifi = snd_soc_read(codec, WM8753_HIFI) & 0x01f3;
        int coeff;
 
        /* is digital filter coefficient valid ? */
@@ -1172,7 +1125,7 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream,
                printk(KERN_ERR "wm8753 invalid MCLK or rate\n");
                return coeff;
        }
-       wm8753_write(codec, WM8753_SRATE1, srate | (coeff_div[coeff].sr << 1) |
+       snd_soc_write(codec, WM8753_SRATE1, srate | (coeff_div[coeff].sr << 1) |
                coeff_div[coeff].usb);
 
        /* bit size */
@@ -1190,7 +1143,7 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream,
                break;
        }
 
-       wm8753_write(codec, WM8753_HIFI, hifi);
+       snd_soc_write(codec, WM8753_HIFI, hifi);
        return 0;
 }
 
@@ -1201,8 +1154,8 @@ static int wm8753_mode1v_set_dai_fmt(struct snd_soc_dai *codec_dai,
        u16 clock;
 
        /* set clk source as pcmclk */
-       clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb;
-       wm8753_write(codec, WM8753_CLOCK, clock);
+       clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb;
+       snd_soc_write(codec, WM8753_CLOCK, clock);
 
        if (wm8753_vdac_adc_set_dai_fmt(codec_dai, fmt) < 0)
                return -EINVAL;
@@ -1224,8 +1177,8 @@ static int wm8753_mode2_set_dai_fmt(struct snd_soc_dai *codec_dai,
        u16 clock;
 
        /* set clk source as pcmclk */
-       clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb;
-       wm8753_write(codec, WM8753_CLOCK, clock);
+       clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb;
+       snd_soc_write(codec, WM8753_CLOCK, clock);
 
        if (wm8753_vdac_adc_set_dai_fmt(codec_dai, fmt) < 0)
                return -EINVAL;
@@ -1239,8 +1192,8 @@ static int wm8753_mode3_4_set_dai_fmt(struct snd_soc_dai *codec_dai,
        u16 clock;
 
        /* set clk source as mclk */
-       clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb;
-       wm8753_write(codec, WM8753_CLOCK, clock | 0x4);
+       clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb;
+       snd_soc_write(codec, WM8753_CLOCK, clock | 0x4);
 
        if (wm8753_hdac_set_dai_fmt(codec_dai, fmt) < 0)
                return -EINVAL;
@@ -1252,19 +1205,19 @@ static int wm8753_mode3_4_set_dai_fmt(struct snd_soc_dai *codec_dai,
 static int wm8753_mute(struct snd_soc_dai *dai, int mute)
 {
        struct snd_soc_codec *codec = dai->codec;
-       u16 mute_reg = wm8753_read_reg_cache(codec, WM8753_DAC) & 0xfff7;
+       u16 mute_reg = snd_soc_read(codec, WM8753_DAC) & 0xfff7;
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
 
        /* the digital mute covers the HiFi and Voice DAC's on the WM8753.
         * make sure we check if they are not both active when we mute */
        if (mute && wm8753->dai_func == 1) {
                if (!codec->active)
-                       wm8753_write(codec, WM8753_DAC, mute_reg | 0x8);
+                       snd_soc_write(codec, WM8753_DAC, mute_reg | 0x8);
        } else {
                if (mute)
-                       wm8753_write(codec, WM8753_DAC, mute_reg | 0x8);
+                       snd_soc_write(codec, WM8753_DAC, mute_reg | 0x8);
                else
-                       wm8753_write(codec, WM8753_DAC, mute_reg);
+                       snd_soc_write(codec, WM8753_DAC, mute_reg);
        }
 
        return 0;
@@ -1273,23 +1226,23 @@ static int wm8753_mute(struct snd_soc_dai *dai, int mute)
 static int wm8753_set_bias_level(struct snd_soc_codec *codec,
                                 enum snd_soc_bias_level level)
 {
-       u16 pwr_reg = wm8753_read_reg_cache(codec, WM8753_PWR1) & 0xfe3e;
+       u16 pwr_reg = snd_soc_read(codec, WM8753_PWR1) & 0xfe3e;
 
        switch (level) {
        case SND_SOC_BIAS_ON:
                /* set vmid to 50k and unmute dac */
-               wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x00c0);
+               snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x00c0);
                break;
        case SND_SOC_BIAS_PREPARE:
                /* set vmid to 5k for quick power up */
-               wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x01c1);
+               snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x01c1);
                break;
        case SND_SOC_BIAS_STANDBY:
                /* mute dac and set vmid to 500k, enable VREF */
-               wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x0141);
+               snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x0141);
                break;
        case SND_SOC_BIAS_OFF:
-               wm8753_write(codec, WM8753_PWR1, 0x0001);
+               snd_soc_write(codec, WM8753_PWR1, 0x0001);
                break;
        }
        codec->bias_level = level;
@@ -1477,7 +1430,7 @@ static void wm8753_set_dai_mode(struct snd_soc_codec *codec,
                else
                        dai->driver = &wm8753_all_dai[(wm8753->dai_func << 1) + 1];
        }
-       wm8753_write(codec, WM8753_IOCTL, wm8753->dai_func);
+       snd_soc_write(codec, WM8753_IOCTL, wm8753->dai_func);
 }
 
 static void wm8753_work(struct work_struct *work)
@@ -1495,22 +1448,19 @@ static int wm8753_suspend(struct snd_soc_codec *codec, pm_message_t state)
 
 static int wm8753_resume(struct snd_soc_codec *codec)
 {
+       u16 *reg_cache = codec->reg_cache;
        int i;
-       u8 data[2];
-       u16 *cache = codec->reg_cache;
 
        /* Sync reg_cache with the hardware */
-       for (i = 0; i < ARRAY_SIZE(wm8753_reg); i++) {
-               if (i + 1 == WM8753_RESET)
+       for (i = 1; i < ARRAY_SIZE(wm8753_reg); i++) {
+               if (i == WM8753_RESET)
                        continue;
 
                /* No point in writing hardware default values back */
-               if (cache[i] == wm8753_reg[i])
+               if (reg_cache[i] == wm8753_reg[i])
                        continue;
 
-               data[0] = ((i + 1) << 1) | ((cache[i] >> 8) & 0x0001);
-               data[1] = cache[i] & 0x00ff;
-               codec->hw_write(codec->control_data, data, 2);
+               snd_soc_write(codec, i, reg_cache[i]);
        }
 
        wm8753_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
@@ -1548,7 +1498,7 @@ static int run_delayed_work(struct delayed_work *dwork)
 static int wm8753_probe(struct snd_soc_codec *codec)
 {
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
-       int ret = 0, reg;
+       int ret;
 
        INIT_DELAYED_WORK(&codec->delayed_work, wm8753_work);
 
@@ -1573,26 +1523,16 @@ static int wm8753_probe(struct snd_soc_codec *codec)
                              msecs_to_jiffies(caps_charge));
 
        /* set the update bits */
-       reg = wm8753_read_reg_cache(codec, WM8753_LDAC);
-       wm8753_write(codec, WM8753_LDAC, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_RDAC);
-       wm8753_write(codec, WM8753_RDAC, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_LADC);
-       wm8753_write(codec, WM8753_LADC, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_RADC);
-       wm8753_write(codec, WM8753_RADC, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_LOUT1V);
-       wm8753_write(codec, WM8753_LOUT1V, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_ROUT1V);
-       wm8753_write(codec, WM8753_ROUT1V, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_LOUT2V);
-       wm8753_write(codec, WM8753_LOUT2V, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_ROUT2V);
-       wm8753_write(codec, WM8753_ROUT2V, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_LINVOL);
-       wm8753_write(codec, WM8753_LINVOL, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_RINVOL);
-       wm8753_write(codec, WM8753_RINVOL, reg | 0x0100);
+       snd_soc_update_bits(codec, WM8753_LDAC, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_RDAC, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_LDAC, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_RDAC, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_LOUT1V, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_ROUT1V, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_LOUT2V, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_ROUT2V, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_LINVOL, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_RINVOL, 0x0100, 0x0100);
 
        snd_soc_add_controls(codec, wm8753_snd_controls,
                             ARRAY_SIZE(wm8753_snd_controls));
index 9001cc48ba1371596c890ce12465e314d9022a81..1ec12eff06205f9023b57c5da7b836d6e81a1f4b 100644 (file)
@@ -50,8 +50,6 @@ static const char *wm8904_supply_names[WM8904_NUM_SUPPLIES] = {
 /* codec private data */
 struct wm8904_priv {
 
-       u16 reg_cache[WM8904_MAX_REGISTER + 1];
-
        enum wm8904_type devtype;
        void *control_data;
 
@@ -2094,7 +2092,7 @@ static int wm8904_digital_mute(struct snd_soc_dai *codec_dai, int mute)
 
 static void wm8904_sync_cache(struct snd_soc_codec *codec)
 {
-       struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int i;
 
        if (!codec->cache_sync)
@@ -2105,14 +2103,14 @@ static void wm8904_sync_cache(struct snd_soc_codec *codec)
        /* Sync back cached values if they're different from the
         * hardware default.
         */
-       for (i = 1; i < ARRAY_SIZE(wm8904->reg_cache); i++) {
+       for (i = 1; i < codec->driver->reg_cache_size; i++) {
                if (!wm8904_access[i].writable)
                        continue;
 
-               if (wm8904->reg_cache[i] == wm8904_reg[i])
+               if (reg_cache[i] == wm8904_reg[i])
                        continue;
 
-               snd_soc_write(codec, i, wm8904->reg_cache[i]);
+               snd_soc_write(codec, i, reg_cache[i]);
        }
 
        codec->cache_sync = 0;
@@ -2371,6 +2369,7 @@ static int wm8904_probe(struct snd_soc_codec *codec)
 {
        struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
        struct wm8904_pdata *pdata = wm8904->pdata;
+       u16 *reg_cache = codec->reg_cache;
        int ret, i;
 
        codec->cache_sync = 1;
@@ -2437,19 +2436,19 @@ static int wm8904_probe(struct snd_soc_codec *codec)
        }
 
        /* Change some default settings - latch VU and enable ZC */
-       wm8904->reg_cache[WM8904_ADC_DIGITAL_VOLUME_LEFT] |= WM8904_ADC_VU;
-       wm8904->reg_cache[WM8904_ADC_DIGITAL_VOLUME_RIGHT] |= WM8904_ADC_VU;
-       wm8904->reg_cache[WM8904_DAC_DIGITAL_VOLUME_LEFT] |= WM8904_DAC_VU;
-       wm8904->reg_cache[WM8904_DAC_DIGITAL_VOLUME_RIGHT] |= WM8904_DAC_VU;
-       wm8904->reg_cache[WM8904_ANALOGUE_OUT1_LEFT] |= WM8904_HPOUT_VU |
+       reg_cache[WM8904_ADC_DIGITAL_VOLUME_LEFT] |= WM8904_ADC_VU;
+       reg_cache[WM8904_ADC_DIGITAL_VOLUME_RIGHT] |= WM8904_ADC_VU;
+       reg_cache[WM8904_DAC_DIGITAL_VOLUME_LEFT] |= WM8904_DAC_VU;
+       reg_cache[WM8904_DAC_DIGITAL_VOLUME_RIGHT] |= WM8904_DAC_VU;
+       reg_cache[WM8904_ANALOGUE_OUT1_LEFT] |= WM8904_HPOUT_VU |
                WM8904_HPOUTLZC;
-       wm8904->reg_cache[WM8904_ANALOGUE_OUT1_RIGHT] |= WM8904_HPOUT_VU |
+       reg_cache[WM8904_ANALOGUE_OUT1_RIGHT] |= WM8904_HPOUT_VU |
                WM8904_HPOUTRZC;
-       wm8904->reg_cache[WM8904_ANALOGUE_OUT2_LEFT] |= WM8904_LINEOUT_VU |
+       reg_cache[WM8904_ANALOGUE_OUT2_LEFT] |= WM8904_LINEOUT_VU |
                WM8904_LINEOUTLZC;
-       wm8904->reg_cache[WM8904_ANALOGUE_OUT2_RIGHT] |= WM8904_LINEOUT_VU |
+       reg_cache[WM8904_ANALOGUE_OUT2_RIGHT] |= WM8904_LINEOUT_VU |
                WM8904_LINEOUTRZC;
-       wm8904->reg_cache[WM8904_CLOCK_RATES_0] &= ~WM8904_SR_MODE;
+       reg_cache[WM8904_CLOCK_RATES_0] &= ~WM8904_SR_MODE;
 
        /* Apply configuration from the platform data. */
        if (wm8904->pdata) {
@@ -2457,23 +2456,23 @@ static int wm8904_probe(struct snd_soc_codec *codec)
                        if (!pdata->gpio_cfg[i])
                                continue;
 
-                       wm8904->reg_cache[WM8904_GPIO_CONTROL_1 + i]
+                       reg_cache[WM8904_GPIO_CONTROL_1 + i]
                                = pdata->gpio_cfg[i] & 0xffff;
                }
 
                /* Zero is the default value for these anyway */
                for (i = 0; i < WM8904_MIC_REGS; i++)
-                       wm8904->reg_cache[WM8904_MIC_BIAS_CONTROL_0 + i]
+                       reg_cache[WM8904_MIC_BIAS_CONTROL_0 + i]
                                = pdata->mic_cfg[i];
        }
 
        /* Set Class W by default - this will be managed by the Class
         * G widget at runtime where bypass paths are available.
         */
-       wm8904->reg_cache[WM8904_CLASS_W_0] |= WM8904_CP_DYN_PWR;
+       reg_cache[WM8904_CLASS_W_0] |= WM8904_CP_DYN_PWR;
 
        /* Use normal bias source */
-       wm8904->reg_cache[WM8904_BIAS_CONTROL_0] &= ~WM8904_POBCTRL;
+       reg_cache[WM8904_BIAS_CONTROL_0] &= ~WM8904_POBCTRL;
 
        wm8904_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 
index 2cb16f895c4607c9d323ea3cbb9d8e31c4741cb1..23086e2c976abf686c62624da2fd00ff59417594 100644 (file)
@@ -768,6 +768,7 @@ static __devinit int wm8940_i2c_probe(struct i2c_client *i2c,
 
        i2c_set_clientdata(i2c, wm8940);
        wm8940->control_data = i2c;
+       wm8940->control_type = SND_SOC_I2C;
 
        ret = snd_soc_register_codec(&i2c->dev,
                        &soc_codec_dev_wm8940, &wm8940_dai, 1);
index 9cbab8e1de0149cd8b2063406ecbae29ca0c78e5..2ac35b0be86acb37faaabdb1c0f81b321a2bb741 100644 (file)
@@ -42,8 +42,6 @@ static const char *wm8955_supply_names[WM8955_NUM_SUPPLIES] = {
 struct wm8955_priv {
        enum snd_soc_control_type control_type;
 
-       u16 reg_cache[WM8955_MAX_REGISTER + 1];
-
        unsigned int mclk_rate;
 
        int deemph;
@@ -768,6 +766,7 @@ static int wm8955_set_bias_level(struct snd_soc_codec *codec,
                                 enum snd_soc_bias_level level)
 {
        struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int ret, i;
 
        switch (level) {
@@ -800,14 +799,14 @@ static int wm8955_set_bias_level(struct snd_soc_codec *codec,
                        /* Sync back cached values if they're
                         * different from the hardware default.
                         */
-                       for (i = 0; i < ARRAY_SIZE(wm8955->reg_cache); i++) {
+                       for (i = 0; i < codec->driver->reg_cache_size; i++) {
                                if (i == WM8955_RESET)
                                        continue;
 
-                               if (wm8955->reg_cache[i] == wm8955_reg[i])
+                               if (reg_cache[i] == wm8955_reg[i])
                                        continue;
 
-                               snd_soc_write(codec, i, wm8955->reg_cache[i]);
+                               snd_soc_write(codec, i, reg_cache[i]);
                        }
 
                        /* Enable VREF and VMID */
@@ -902,6 +901,7 @@ static int wm8955_probe(struct snd_soc_codec *codec)
 {
        struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec);
        struct wm8955_pdata *pdata = dev_get_platdata(codec->dev);
+       u16 *reg_cache = codec->reg_cache;
        int ret, i;
 
        ret = snd_soc_codec_set_cache_io(codec, 7, 9, wm8955->control_type);
@@ -934,25 +934,25 @@ static int wm8955_probe(struct snd_soc_codec *codec)
        }
 
        /* Change some default settings - latch VU and enable ZC */
-       wm8955->reg_cache[WM8955_LEFT_DAC_VOLUME] |= WM8955_LDVU;
-       wm8955->reg_cache[WM8955_RIGHT_DAC_VOLUME] |= WM8955_RDVU;
-       wm8955->reg_cache[WM8955_LOUT1_VOLUME] |= WM8955_LO1VU | WM8955_LO1ZC;
-       wm8955->reg_cache[WM8955_ROUT1_VOLUME] |= WM8955_RO1VU | WM8955_RO1ZC;
-       wm8955->reg_cache[WM8955_LOUT2_VOLUME] |= WM8955_LO2VU | WM8955_LO2ZC;
-       wm8955->reg_cache[WM8955_ROUT2_VOLUME] |= WM8955_RO2VU | WM8955_RO2ZC;
-       wm8955->reg_cache[WM8955_MONOOUT_VOLUME] |= WM8955_MOZC;
+       reg_cache[WM8955_LEFT_DAC_VOLUME] |= WM8955_LDVU;
+       reg_cache[WM8955_RIGHT_DAC_VOLUME] |= WM8955_RDVU;
+       reg_cache[WM8955_LOUT1_VOLUME] |= WM8955_LO1VU | WM8955_LO1ZC;
+       reg_cache[WM8955_ROUT1_VOLUME] |= WM8955_RO1VU | WM8955_RO1ZC;
+       reg_cache[WM8955_LOUT2_VOLUME] |= WM8955_LO2VU | WM8955_LO2ZC;
+       reg_cache[WM8955_ROUT2_VOLUME] |= WM8955_RO2VU | WM8955_RO2ZC;
+       reg_cache[WM8955_MONOOUT_VOLUME] |= WM8955_MOZC;
 
        /* Also enable adaptive bass boost by default */
-       wm8955->reg_cache[WM8955_BASS_CONTROL] |= WM8955_BB;
+       reg_cache[WM8955_BASS_CONTROL] |= WM8955_BB;
 
        /* Set platform data values */
        if (pdata) {
                if (pdata->out2_speaker)
-                       wm8955->reg_cache[WM8955_ADDITIONAL_CONTROL_2]
+                       reg_cache[WM8955_ADDITIONAL_CONTROL_2]
                                |= WM8955_ROUT2INV;
 
                if (pdata->monoin_diff)
-                       wm8955->reg_cache[WM8955_MONO_OUT_MIX_1]
+                       reg_cache[WM8955_MONO_OUT_MIX_1]
                                |= WM8955_DMEN;
        }
 
@@ -1003,6 +1003,7 @@ static __devinit int wm8955_i2c_probe(struct i2c_client *i2c,
                return -ENOMEM;
 
        i2c_set_clientdata(i2c, wm8955);
+       wm8955->control_type = SND_SOC_I2C;
 
        ret = snd_soc_register_codec(&i2c->dev,
                        &soc_codec_dev_wm8955, &wm8955_dai, 1);
index 21986c42272f07ac693cd21d15f48cff20c43352..ff6ff2f529d2a97f9e2522ceff5df18355cecd5c 100644 (file)
@@ -1013,6 +1013,7 @@ static __devinit int wm8960_i2c_probe(struct i2c_client *i2c,
                return -ENOMEM;
 
        i2c_set_clientdata(i2c, wm8960);
+       wm8960->control_type = SND_SOC_I2C;
        wm8960->control_data = i2c;
 
        ret = snd_soc_register_codec(&i2c->dev,
index 1304ca91a11c708c567f69c45052c95f1f7be17e..7c421cc837bd3d697969b6f0c0f94f0ecb7abc5f 100644 (file)
@@ -52,8 +52,6 @@ static const char *wm8962_supply_names[WM8962_NUM_SUPPLIES] = {
 struct wm8962_priv {
        struct snd_soc_codec *codec;
 
-       u16 reg_cache[WM8962_MAX_REGISTER + 1];
-
        int sysclk;
        int sysclk_rate;
 
@@ -1991,8 +1989,7 @@ static int wm8962_put_hp_sw(struct snd_kcontrol *kcontrol,
                            struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec);
-       u16 *reg_cache = wm8962->reg_cache;
+       u16 *reg_cache = codec->reg_cache;
        int ret;
 
        /* Apply the update (if any) */
@@ -2020,8 +2017,7 @@ static int wm8962_put_spk_sw(struct snd_kcontrol *kcontrol,
                            struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec);
-       u16 *reg_cache = wm8962->reg_cache;
+       u16 *reg_cache = codec->reg_cache;
        int ret;
 
        /* Apply the update (if any) */
@@ -2329,8 +2325,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
                         struct snd_kcontrol *kcontrol, int event)
 {
        struct snd_soc_codec *codec = w->codec;
-       struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec);
-       u16 *reg_cache = wm8962->reg_cache;
+       u16 *reg_cache = codec->reg_cache;
        int reg;
 
        switch (w->shift) {
@@ -2719,7 +2714,7 @@ static int wm8962_add_widgets(struct snd_soc_codec *codec)
 
 static void wm8962_sync_cache(struct snd_soc_codec *codec)
 {
-       struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int i;
 
        if (!codec->cache_sync)
@@ -2732,13 +2727,13 @@ static void wm8962_sync_cache(struct snd_soc_codec *codec)
        /* Sync back cached values if they're different from the
         * hardware default.
         */
-       for (i = 1; i < ARRAY_SIZE(wm8962->reg_cache); i++) {
+       for (i = 1; i < codec->driver->reg_cache_size; i++) {
                if (i == WM8962_SOFTWARE_RESET)
                        continue;
-               if (wm8962->reg_cache[i] == wm8962_reg[i])
+               if (reg_cache[i] == wm8962_reg[i])
                        continue;
 
-               snd_soc_write(codec, i, wm8962->reg_cache[i]);
+               snd_soc_write(codec, i, reg_cache[i]);
        }
 
        codec->cache_sync = 0;
@@ -3406,12 +3401,11 @@ EXPORT_SYMBOL_GPL(wm8962_mic_detect);
 #ifdef CONFIG_PM
 static int wm8962_resume(struct snd_soc_codec *codec)
 {
-       struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec);
        u16 *reg_cache = codec->reg_cache;
        int i;
 
        /* Restore the registers */
-       for (i = 1; i < ARRAY_SIZE(wm8962->reg_cache); i++) {
+       for (i = 1; i < codec->driver->reg_cache_size; i++) {
                switch (i) {
                case WM8962_SOFTWARE_RESET:
                        continue;
@@ -3705,6 +3699,7 @@ static int wm8962_probe(struct snd_soc_codec *codec)
        struct wm8962_pdata *pdata = dev_get_platdata(codec->dev);
        struct i2c_client *i2c = container_of(codec->dev, struct i2c_client,
                                              dev);
+       u16 *reg_cache = codec->reg_cache;
        int i, trigger, irq_pol;
 
        wm8962->codec = codec;
@@ -3804,7 +3799,7 @@ static int wm8962_probe(struct snd_soc_codec *codec)
 
                /* Put the speakers into mono mode? */
                if (pdata->spk_mono)
-                       wm8962->reg_cache[WM8962_CLASS_D_CONTROL_2]
+                       reg_cache[WM8962_CLASS_D_CONTROL_2]
                                |= WM8962_SPK_MONO;
 
                /* Micbias setup, detection enable and detection
@@ -3819,16 +3814,16 @@ static int wm8962_probe(struct snd_soc_codec *codec)
        }
 
        /* Latch volume update bits */
-       wm8962->reg_cache[WM8962_LEFT_INPUT_VOLUME] |= WM8962_IN_VU;
-       wm8962->reg_cache[WM8962_RIGHT_INPUT_VOLUME] |= WM8962_IN_VU;
-       wm8962->reg_cache[WM8962_LEFT_ADC_VOLUME] |= WM8962_ADC_VU;
-       wm8962->reg_cache[WM8962_RIGHT_ADC_VOLUME] |= WM8962_ADC_VU;    
-       wm8962->reg_cache[WM8962_LEFT_DAC_VOLUME] |= WM8962_DAC_VU;
-       wm8962->reg_cache[WM8962_RIGHT_DAC_VOLUME] |= WM8962_DAC_VU;
-       wm8962->reg_cache[WM8962_SPKOUTL_VOLUME] |= WM8962_SPKOUT_VU;
-       wm8962->reg_cache[WM8962_SPKOUTR_VOLUME] |= WM8962_SPKOUT_VU;
-       wm8962->reg_cache[WM8962_HPOUTL_VOLUME] |= WM8962_HPOUT_VU;
-       wm8962->reg_cache[WM8962_HPOUTR_VOLUME] |= WM8962_HPOUT_VU;
+       reg_cache[WM8962_LEFT_INPUT_VOLUME] |= WM8962_IN_VU;
+       reg_cache[WM8962_RIGHT_INPUT_VOLUME] |= WM8962_IN_VU;
+       reg_cache[WM8962_LEFT_ADC_VOLUME] |= WM8962_ADC_VU;
+       reg_cache[WM8962_RIGHT_ADC_VOLUME] |= WM8962_ADC_VU;
+       reg_cache[WM8962_LEFT_DAC_VOLUME] |= WM8962_DAC_VU;
+       reg_cache[WM8962_RIGHT_DAC_VOLUME] |= WM8962_DAC_VU;
+       reg_cache[WM8962_SPKOUTL_VOLUME] |= WM8962_SPKOUT_VU;
+       reg_cache[WM8962_SPKOUTR_VOLUME] |= WM8962_SPKOUT_VU;
+       reg_cache[WM8962_HPOUTL_VOLUME] |= WM8962_HPOUT_VU;
+       reg_cache[WM8962_HPOUTR_VOLUME] |= WM8962_HPOUT_VU;
 
        wm8962_add_widgets(codec);
 
index 63f6dbf5d07021887084e57541591bf1fa570fb3..9f18db6e167c0c86924b08e434d671bea98c8ff1 100644 (file)
@@ -718,6 +718,7 @@ static __devinit int wm8971_i2c_probe(struct i2c_client *i2c,
        if (wm8971 == NULL)
                return -ENOMEM;
 
+       wm8971->control_type = SND_SOC_I2C;
        i2c_set_clientdata(i2c, wm8971);
 
        ret = snd_soc_register_codec(&i2c->dev,
index ecc7c37180c7ad2158f11b9de1579bde332cb44d..a486670966bd7e6a44470ece6918fab24c638c2b 100644 (file)
@@ -1335,6 +1335,7 @@ static __devinit int wm9081_i2c_probe(struct i2c_client *i2c,
                return -ENOMEM;
 
        i2c_set_clientdata(i2c, wm9081);
+       wm9081->control_type = SND_SOC_I2C;
        wm9081->control_data = i2c;
 
        ret = snd_soc_register_codec(&i2c->dev,
index 99c046ba46bb6ed637d356579a2f4af9d1841bc3..6e5f64f627cb82e6c7379ab389ec0a10e45d9b0a 100644 (file)
@@ -141,7 +141,6 @@ static const u16 wm9090_reg_defaults[] = {
 /* This struct is used to save the context */
 struct wm9090_priv {
        struct mutex mutex;
-       u16 reg_cache[WM9090_MAX_REGISTER + 1];
        struct wm9090_platform_data pdata;
        void *control_data;
 };
@@ -552,6 +551,7 @@ static int wm9090_set_bias_level(struct snd_soc_codec *codec,
 static int wm9090_probe(struct snd_soc_codec *codec)
 {
        struct wm9090_priv *wm9090 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int ret;
 
        codec->control_data = wm9090->control_data;
@@ -576,22 +576,22 @@ static int wm9090_probe(struct snd_soc_codec *codec)
        /* Configure some defaults; they will be written out when we
         * bring the bias up.
         */
-       wm9090->reg_cache[WM9090_IN1_LINE_INPUT_A_VOLUME] |= WM9090_IN1_VU
+       reg_cache[WM9090_IN1_LINE_INPUT_A_VOLUME] |= WM9090_IN1_VU
                | WM9090_IN1A_ZC;
-       wm9090->reg_cache[WM9090_IN1_LINE_INPUT_B_VOLUME] |= WM9090_IN1_VU
+       reg_cache[WM9090_IN1_LINE_INPUT_B_VOLUME] |= WM9090_IN1_VU
                | WM9090_IN1B_ZC;
-       wm9090->reg_cache[WM9090_IN2_LINE_INPUT_A_VOLUME] |= WM9090_IN2_VU
+       reg_cache[WM9090_IN2_LINE_INPUT_A_VOLUME] |= WM9090_IN2_VU
                | WM9090_IN2A_ZC;
-       wm9090->reg_cache[WM9090_IN2_LINE_INPUT_B_VOLUME] |= WM9090_IN2_VU
+       reg_cache[WM9090_IN2_LINE_INPUT_B_VOLUME] |= WM9090_IN2_VU
                | WM9090_IN2B_ZC;
-       wm9090->reg_cache[WM9090_SPEAKER_VOLUME_LEFT] |=
+       reg_cache[WM9090_SPEAKER_VOLUME_LEFT] |=
                WM9090_SPKOUT_VU | WM9090_SPKOUTL_ZC;
-       wm9090->reg_cache[WM9090_LEFT_OUTPUT_VOLUME] |=
+       reg_cache[WM9090_LEFT_OUTPUT_VOLUME] |=
                WM9090_HPOUT1_VU | WM9090_HPOUT1L_ZC;
-       wm9090->reg_cache[WM9090_RIGHT_OUTPUT_VOLUME] |=
+       reg_cache[WM9090_RIGHT_OUTPUT_VOLUME] |=
                WM9090_HPOUT1_VU | WM9090_HPOUT1R_ZC;
 
-       wm9090->reg_cache[WM9090_CLOCKING_1] |= WM9090_TOCLK_ENA;
+       reg_cache[WM9090_CLOCKING_1] |= WM9090_TOCLK_ENA;
 
        wm9090_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 
index b2c63309a65165b471822e99268c828bbdb07777..6f5a498608b292241e93dc9c498e8ce5f6a683cc 100644 (file)
@@ -24,12 +24,47 @@ OPTIONS
 --input=::
         Input file name. (default: perf.data)
 
+-d::
+--dsos=<dso[,dso...]>::
+        Only consider symbols in these dsos.
+-s::
+--symbol=<symbol>::
+        Symbol to annotate.
+
+-f::
+--force::
+        Don't complain, do it.
+
+-v::
+--verbose::
+        Be more verbose. (Show symbol address, etc)
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-k::
+--vmlinux=<file>::
+        vmlinux pathname.
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel.
+
+-l::
+--print-line::
+        Print matching source lines (may be slow).
+
+-P::
+--full-paths::
+        Don't shorten the displayed pathnames.
+
 --stdio:: Use the stdio interface.
 
 --tui:: Use the TUI interface Use of --tui requires a tty, if one is not
        present, as when piping to other commands, the stdio interface is
        used. This interfaces starts by centering on the line with more
-       samples, TAB/UNTAB cycles thru the lines with more samples.
+       samples, TAB/UNTAB cycles through the lines with more samples.
 
 SEE ALSO
 --------
index 01b642c0bf8f974aedf2560df4e91cc5dd514a37..5eaac6f26d51e861236cfa48ab7c0509107b472c 100644 (file)
@@ -18,6 +18,9 @@ perf report.
 
 OPTIONS
 -------
+-H::
+--with-hits::
+        Show only DSOs with hits.
 -i::
 --input=::
         Input file name. (default: perf.data)
index 20d97d84ea1c37164005a4f38b8af5e23104ef93..74d7481ed7a6916f8797ed67b91b5c5d80382edc 100644 (file)
@@ -19,6 +19,18 @@ If no parameters are passed it will assume perf.data.old and perf.data.
 
 OPTIONS
 -------
+-M::
+--displacement::
+        Show position displacement relative to baseline.
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel
+
 -d::
 --dsos=::
        Only consider symbols in these dsos. CSV that understands
@@ -42,7 +54,7 @@ OPTIONS
 --field-separator=::
 
        Use a special separator character and don't pad with spaces, replacing
-       all occurances of this separator in symbol names (and other output)
+       all occurrences of this separator in symbol names (and other output)
        with a '.' character, that thus it's the only non valid separator.
 
 -v::
@@ -50,6 +62,13 @@ OPTIONS
        Be verbose, for instance, show the raw counts in addition to the
        diff.
 
+-f::
+--force::
+       Don't complain, do it.
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
index d004e19fe6d6ffcc9c9d8e25822d251d8dda9c6e..dd84cb2f0a8861dd8656b4058ae8ed906f17169c 100644 (file)
@@ -22,7 +22,7 @@ There are a couple of variants of perf kvm:
   a performance counter profile of guest os in realtime
   of an arbitrary workload.
 
-  'perf kvm record <command>' to record the performance couinter profile
+  'perf kvm record <command>' to record the performance counter profile
   of an arbitrary workload and save it into a perf data file. If both
   --host and --guest are input, the perf data file name is perf.data.kvm.
   If there is  no --host but --guest, the file name is perf.data.guest.
@@ -40,6 +40,12 @@ There are a couple of variants of perf kvm:
 
 OPTIONS
 -------
+-i::
+--input=::
+        Input file name.
+-o::
+--output::
+        Output file name.
 --host=::
         Collect host side performance profile.
 --guest=::
index b317102138c82f2a78a58ada898888603f8cc802..921de259ea1086f36ce5b7e6a3426cec5618deba 100644 (file)
@@ -24,6 +24,21 @@ and statistics with this 'perf lock' command.
 
   'perf lock report' reports statistical data.
 
+OPTIONS
+-------
+
+-i::
+--input=<file>::
+        Input file name.
+
+-v::
+--verbose::
+        Be more verbose (show symbol address, etc).
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
 SEE ALSO
 --------
 linkperf:perf[1]
index 62de1b7f4e760367337042c52760e7a49ded50c7..86b797a35aa6acae540b652345937991257daa4d 100644 (file)
@@ -115,9 +115,9 @@ Each probe argument follows below syntax.
 
 LINE SYNTAX
 -----------
-Line range is descripted by following syntax.
+Line range is described by following syntax.
 
- "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]"
+ "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
 
 FUNC specifies the function name of showing lines. 'RLN' is the start line
 number from function entry line, and 'RLN2' is the end line number. As same as
index a91f9f9e6e5c27f96623fd10061f12a3041926b6..52462ae26455c264aa83130c7bc50c9ca97807cc 100644 (file)
@@ -39,15 +39,24 @@ OPTIONS
           be passed as follows: '\mem:addr[:[r][w][x]]'.
           If you want to profile read-write accesses in 0x1000, just set
           'mem:0x1000:rw'.
+
+--filter=<filter>::
+        Event filter.
+
 -a::
-        System-wide collection.
+--all-cpus::
+        System-wide collection from all CPUs.
 
 -l::
         Scale counter values.
 
 -p::
 --pid=::
-       Record events on existing pid.
+       Record events on existing process ID.
+
+-t::
+--tid=::
+        Record events on existing thread ID.
 
 -r::
 --realtime=::
@@ -99,6 +108,11 @@ OPTIONS
 --data::
        Sample addresses.
 
+-T::
+--timestamp::
+       Sample timestamps. Use it with 'perf report -D' to see the timestamps,
+       for instance.
+
 -n::
 --no-samples::
        Don't sample.
@@ -109,8 +123,8 @@ Collect raw sample records from all opened counters (default for tracepoint coun
 
 -C::
 --cpu::
-Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 In per-thread mode with inheritance mode on (default), samples are captured only when
 the thread executes on the designated CPUs. Default is to monitor all CPUs.
 
index 12052c9ed0babfc3a1c93cc01758ec3b7747ee10..8ba03d6e5398d8387b11f9caf183bed81a0eb5a2 100644 (file)
@@ -20,6 +20,11 @@ OPTIONS
 -i::
 --input=::
         Input file name. (default: perf.data)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
 -d::
 --dsos=::
        Only consider symbols in these dsos. CSV that understands
@@ -27,6 +32,10 @@ OPTIONS
 -n::
 --show-nr-samples::
        Show the number of samples for each symbol
+
+--showcpuutilization::
+        Show sample percentage for different cpu modes.
+
 -T::
 --threads::
        Show per-thread event counters
@@ -39,12 +48,24 @@ OPTIONS
        Only consider these symbols. CSV that understands
        file://filename entries.
 
+-U::
+--hide-unresolved::
+        Only display entries resolved to a symbol.
+
 -s::
 --sort=::
        Sort by key(s): pid, comm, dso, symbol, parent.
 
+-p::
+--parent=<regex>::
+        regex filter to identify parent, see: '--sort parent'
+
+-x::
+--exclude-other::
+        Only display entries with parent-match.
+
 -w::
---field-width=::
+--column-widths=<width[,width...]>::
        Force each column width to the provided list, for large terminal
        readability.
 
@@ -52,19 +73,26 @@ OPTIONS
 --field-separator=::
 
        Use a special separator character and don't pad with spaces, replacing
-       all occurances of this separator in symbol names (and other output)
+       all occurrences of this separator in symbol names (and other output)
        with a '.' character, that thus it's the only non valid separator.
 
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
 -g [type,min]::
 --call-graph::
-        Display callchains using type and min percent threshold.
+        Display call chains using type and min percent threshold.
        type can be either:
-       - flat: single column, linear exposure of callchains.
+       - flat: single column, linear exposure of call chains.
        - graph: use a graph tree, displaying absolute overhead rates.
        - fractal: like graph, but displays relative rates. Each branch of
                 the tree is considered as a new profiled object. +
        Default: fractal,0.5.
 
+--pretty=<key>::
+        Pretty printing style.  key: normal, raw
+
 --stdio:: Use the stdio interface.
 
 --tui:: Use the TUI interface, that is integrated with annotate and allows
@@ -72,6 +100,25 @@ OPTIONS
        requires a tty, if one is not present, as when piping to other
        commands, the stdio interface is used.
 
+-k::
+--vmlinux=<file>::
+        vmlinux pathname
+
+--kallsyms=<file>::
+        kallsyms pathname
+
+-m::
+--modules::
+        Load module symbols. WARNING: This should only be used with -k and
+        a LIVE kernel.
+
+-f::
+--force::
+        Don't complain, do it.
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1]
index 8417644a6166b9fcd071b88eebb9dd46f00b8a1f..46822d5fde1c0328ca8af0f7687265adeeca7f0e 100644 (file)
@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies)
 SYNOPSIS
 --------
 [verse]
-'perf sched' {record|latency|replay|trace}
+'perf sched' {record|latency|map|replay|trace}
 
 DESCRIPTION
 -----------
-There are four variants of perf sched:
+There are five variants of perf sched:
 
   'perf sched record <command>' to record the scheduling events
   of an arbitrary workload.
@@ -30,8 +30,22 @@ There are four variants of perf sched:
   of the workload as it occurred when it was recorded - and can repeat
   it a number of times, measuring its performance.)
 
+  'perf sched map' to print a textual context-switching outline of
+  workload captured via perf sched record.  Columns stand for
+  individual CPUs, and the two-letter shortcuts stand for tasks that
+  are running on a CPU. A '*' denotes the CPU that had the event, and
+  a dot signals an idle CPU.
+
 OPTIONS
 -------
+-i::
+--input=<file>::
+        Input file name. (default: perf.data)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
 -D::
 --dump-raw-trace=::
         Display verbose dump of the sched data.
diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
new file mode 100644 (file)
index 0000000..5bb41e5
--- /dev/null
@@ -0,0 +1,217 @@
+perf-script-perl(1)
+==================
+
+NAME
+----
+perf-script-perl - Process trace data with a Perl script
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [-s [Perl]:script[.pl] ]
+
+DESCRIPTION
+-----------
+
+This perf script option is used to process perf script data using perf's
+built-in Perl interpreter.  It reads and processes the input file and
+displays the results of the trace analysis implemented in the given
+Perl script, if any.
+
+STARTER SCRIPTS
+---------------
+
+You can avoid reading the rest of this document by running 'perf script
+-g perl' in the same directory as an existing perf.data trace file.
+That will generate a starter script containing a handler for each of
+the event types in the trace file; it simply prints every available
+field for each event in the trace file.
+
+You can also look at the existing scripts in
+~/libexec/perf-core/scripts/perl for typical examples showing how to
+do basic things like aggregate event data, print results, etc.  Also,
+the check-perf-script.pl script, while not interesting for its results,
+attempts to exercise all of the main scripting features.
+
+EVENT HANDLERS
+--------------
+
+When perf script is invoked using a trace script, a user-defined
+'handler function' is called for each event in the trace.  If there's
+no handler function defined for a given event type, the event is
+ignored (or passed to a 'trace_handled' function, see below) and the
+next event is processed.
+
+Most of the event's field values are passed as arguments to the
+handler function; some of the less common ones aren't - those are
+available as calls back into the perf executable (see below).
+
+As an example, the following perf record command can be used to record
+all sched_wakeup events in the system:
+
+ # perf record -a -e sched:sched_wakeup
+
+Traces meant to be processed using a script should be recorded with
+the above option: -a to enable system-wide collection.
+
+The format file for the sched_wakep event defines the following fields
+(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
+
+----
+ format:
+        field:unsigned short common_type;
+        field:unsigned char common_flags;
+        field:unsigned char common_preempt_count;
+        field:int common_pid;
+        field:int common_lock_depth;
+
+        field:char comm[TASK_COMM_LEN];
+        field:pid_t pid;
+        field:int prio;
+        field:int success;
+        field:int target_cpu;
+----
+
+The handler function for this event would be defined as:
+
+----
+sub sched::sched_wakeup
+{
+   my ($event_name, $context, $common_cpu, $common_secs,
+       $common_nsecs, $common_pid, $common_comm,
+       $comm, $pid, $prio, $success, $target_cpu) = @_;
+}
+----
+
+The handler function takes the form subsystem::event_name.
+
+The $common_* arguments in the handler's argument list are the set of
+arguments passed to all event handlers; some of the fields correspond
+to the common_* fields in the format file, but some are synthesized,
+and some of the common_* fields aren't common enough to to be passed
+to every event as arguments but are available as library functions.
+
+Here's a brief description of each of the invariant event args:
+
+ $event_name               the name of the event as text
+ $context                  an opaque 'cookie' used in calls back into perf
+ $common_cpu               the cpu the event occurred on
+ $common_secs              the secs portion of the event timestamp
+ $common_nsecs             the nsecs portion of the event timestamp
+ $common_pid               the pid of the current task
+ $common_comm              the name of the current process
+
+All of the remaining fields in the event's format file have
+counterparts as handler function arguments of the same name, as can be
+seen in the example above.
+
+The above provides the basics needed to directly access every field of
+every event in a trace, which covers 90% of what you need to know to
+write a useful trace script.  The sections below cover the rest.
+
+SCRIPT LAYOUT
+-------------
+
+Every perf script Perl script should start by setting up a Perl module
+search path and 'use'ing a few support modules (see module
+descriptions below):
+
+----
+ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib";
+ use lib "./perf-script-Util/lib";
+ use Perf::Trace::Core;
+ use Perf::Trace::Context;
+ use Perf::Trace::Util;
+----
+
+The rest of the script can contain handler functions and support
+functions in any order.
+
+Aside from the event handler functions discussed above, every script
+can implement a set of optional functions:
+
+*trace_begin*, if defined, is called before any event is processed and
+gives scripts a chance to do setup tasks:
+
+----
+ sub trace_begin
+ {
+ }
+----
+
+*trace_end*, if defined, is called after all events have been
+ processed and gives scripts a chance to do end-of-script tasks, such
+ as display results:
+
+----
+sub trace_end
+{
+}
+----
+
+*trace_unhandled*, if defined, is called after for any event that
+ doesn't have a handler explicitly defined for it.  The standard set
+ of common arguments are passed into it:
+
+----
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs,
+        $common_nsecs, $common_pid, $common_comm) = @_;
+}
+----
+
+The remaining sections provide descriptions of each of the available
+built-in perf script Perl modules and their associated functions.
+
+AVAILABLE MODULES AND FUNCTIONS
+-------------------------------
+
+The following sections describe the functions and variables available
+via the various Perf::Trace::* Perl modules.  To use the functions and
+variables from the given module, add the corresponding 'use
+Perf::Trace::XXX' line to your perf script script.
+
+Perf::Trace::Core Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions provide some essential functions to user scripts.
+
+The *flag_str* and *symbol_str* functions provide human-readable
+strings for flag and symbolic fields.  These correspond to the strings
+and values parsed from the 'print fmt' fields of the event format
+files:
+
+  flag_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the flag field $field_name of event $event_name
+  symbol_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the symbolic field $field_name of event $event_name
+
+Perf::Trace::Context Module
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some of the 'common' fields in the event format file aren't all that
+common, but need to be made accessible to user scripts nonetheless.
+
+Perf::Trace::Context defines a set of functions that can be used to
+access this data in the context of the current event.  Each of these
+functions expects a $context variable, which is the same as the
+$context variable passed into every event handler as the second
+argument.
+
+ common_pc($context) - returns common_preempt count for the current event
+ common_flags($context) - returns common_flags for the current event
+ common_lock_depth($context) - returns common_lock_depth for the current event
+
+Perf::Trace::Util Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Various utility functions for use with perf script:
+
+  nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
+  nsecs_secs($nsecs) - returns whole secs portion given nsecs
+  nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs
+  nsecs_str($nsecs) - returns printable string in the form secs.nsecs
+  avg($total, $n) - returns average given a sum and a total number of values
+
+SEE ALSO
+--------
+linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
new file mode 100644 (file)
index 0000000..36b3827
--- /dev/null
@@ -0,0 +1,623 @@
+perf-script-python(1)
+====================
+
+NAME
+----
+perf-script-python - Process trace data with a Python script
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [-s [Python]:script[.py] ]
+
+DESCRIPTION
+-----------
+
+This perf script option is used to process perf script data using perf's
+built-in Python interpreter.  It reads and processes the input file and
+displays the results of the trace analysis implemented in the given
+Python script, if any.
+
+A QUICK EXAMPLE
+---------------
+
+This section shows the process, start to finish, of creating a working
+Python script that aggregates and extracts useful information from a
+raw perf script stream.  You can avoid reading the rest of this
+document if an example is enough for you; the rest of the document
+provides more details on each step and lists the library functions
+available to script writers.
+
+This example actually details the steps that were used to create the
+'syscall-counts' script you see when you list the available perf script
+scripts via 'perf script -l'.  As such, this script also shows how to
+integrate your script into the list of general-purpose 'perf script'
+scripts listed by that command.
+
+The syscall-counts script is a simple script, but demonstrates all the
+basic ideas necessary to create a useful script.  Here's an example
+of its output (syscall names are not yet supported, they will appear
+as numbers):
+
+----
+syscall events:
+
+event                                          count
+----------------------------------------  -----------
+sys_write                                     455067
+sys_getdents                                    4072
+sys_close                                       3037
+sys_swapoff                                     1769
+sys_read                                         923
+sys_sched_setparam                               826
+sys_open                                         331
+sys_newfstat                                     326
+sys_mmap                                         217
+sys_munmap                                       216
+sys_futex                                        141
+sys_select                                       102
+sys_poll                                          84
+sys_setitimer                                     12
+sys_writev                                         8
+15                                                 8
+sys_lseek                                          7
+sys_rt_sigprocmask                                 6
+sys_wait4                                          3
+sys_ioctl                                          3
+sys_set_robust_list                                1
+sys_exit                                           1
+56                                                 1
+sys_access                                         1
+----
+
+Basically our task is to keep a per-syscall tally that gets updated
+every time a system call occurs in the system.  Our script will do
+that, but first we need to record the data that will be processed by
+that script.  Theoretically, there are a couple of ways we could do
+that:
+
+- we could enable every event under the tracing/events/syscalls
+  directory, but this is over 600 syscalls, well beyond the number
+  allowable by perf.  These individual syscall events will however be
+  useful if we want to later use the guidance we get from the
+  general-purpose scripts to drill down and get more detail about
+  individual syscalls of interest.
+
+- we can enable the sys_enter and/or sys_exit syscalls found under
+  tracing/events/raw_syscalls.  These are called for all syscalls; the
+  'id' field can be used to distinguish between individual syscall
+  numbers.
+
+For this script, we only need to know that a syscall was entered; we
+don't care how it exited, so we'll use 'perf record' to record only
+the sys_enter events:
+
+----
+# perf record -a -e raw_syscalls:sys_enter
+
+^C[ perf record: Woken up 1 times to write data ]
+[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
+----
+
+The options basically say to collect data for every syscall event
+system-wide and multiplex the per-cpu output into a single stream.
+That single stream will be recorded in a file in the current directory
+called perf.data.
+
+Once we have a perf.data file containing our data, we can use the -g
+'perf script' option to generate a Python script that will contain a
+callback handler for each event type found in the perf.data trace
+stream (for more details, see the STARTER SCRIPTS section).
+
+----
+# perf script -g python
+generated Python script: perf-script.py
+
+The output file created also in the current directory is named
+perf-script.py.  Here's the file in its entirety:
+
+# perf script event handlers, generated by perf script -g python
+# Licensed under the terms of the GNU GPL License version 2
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the format files.  Those fields not available as handler params can
+# be retrieved using Python functions of the form common_*(context).
+# See the perf-script-python Documentation for the list of available functions.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+       '/scripts/python/perf-script-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+def trace_begin():
+       print "in trace_begin"
+
+def trace_end():
+       print "in trace_end"
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+       common_secs, common_nsecs, common_pid, common_comm,
+       id, args):
+               print_header(event_name, common_cpu, common_secs, common_nsecs,
+                       common_pid, common_comm)
+
+               print "id=%d, args=%s\n" % \
+               (id, args),
+
+def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
+               common_pid, common_comm):
+               print_header(event_name, common_cpu, common_secs, common_nsecs,
+               common_pid, common_comm)
+
+def print_header(event_name, cpu, secs, nsecs, pid, comm):
+       print "%-20s %5u %05u.%09u %8u %-20s " % \
+       (event_name, cpu, secs, nsecs, pid, comm),
+----
+
+At the top is a comment block followed by some import statements and a
+path append which every perf script script should include.
+
+Following that are a couple generated functions, trace_begin() and
+trace_end(), which are called at the beginning and the end of the
+script respectively (for more details, see the SCRIPT_LAYOUT section
+below).
+
+Following those are the 'event handler' functions generated one for
+every event in the 'perf record' output.  The handler functions take
+the form subsystem__event_name, and contain named parameters, one for
+each field in the event; in this case, there's only one event,
+raw_syscalls__sys_enter().  (see the EVENT HANDLERS section below for
+more info on event handlers).
+
+The final couple of functions are, like the begin and end functions,
+generated for every script.  The first, trace_unhandled(), is called
+every time the script finds an event in the perf.data file that
+doesn't correspond to any event handler in the script.  This could
+mean either that the record step recorded event types that it wasn't
+really interested in, or the script was run against a trace file that
+doesn't correspond to the script.
+
+The script generated by -g option simply prints a line for each
+event found in the trace stream i.e. it basically just dumps the event
+and its parameter values to stdout.  The print_header() function is
+simply a utility function used for that purpose.  Let's rename the
+script and run it to see the default output:
+
+----
+# mv perf-script.py syscall-counts.py
+# perf script -s syscall-counts.py
+
+raw_syscalls__sys_enter     1 00840.847582083     7506 perf                  id=1, args=
+raw_syscalls__sys_enter     1 00840.847595764     7506 perf                  id=1, args=
+raw_syscalls__sys_enter     1 00840.847620860     7506 perf                  id=1, args=
+raw_syscalls__sys_enter     1 00840.847710478     6533 npviewer.bin          id=78, args=
+raw_syscalls__sys_enter     1 00840.847719204     6533 npviewer.bin          id=142, args=
+raw_syscalls__sys_enter     1 00840.847755445     6533 npviewer.bin          id=3, args=
+raw_syscalls__sys_enter     1 00840.847775601     6533 npviewer.bin          id=3, args=
+raw_syscalls__sys_enter     1 00840.847781820     6533 npviewer.bin          id=3, args=
+.
+.
+.
+----
+
+Of course, for this script, we're not interested in printing every
+trace event, but rather aggregating it in a useful way.  So we'll get
+rid of everything to do with printing as well as the trace_begin() and
+trace_unhandled() functions, which we won't be using.  That leaves us
+with this minimalistic skeleton:
+
+----
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+       '/scripts/python/perf-script-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+def trace_end():
+       print "in trace_end"
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+       common_secs, common_nsecs, common_pid, common_comm,
+       id, args):
+----
+
+In trace_end(), we'll simply print the results, but first we need to
+generate some results to print.  To do that we need to have our
+sys_enter() handler do the necessary tallying until all events have
+been counted.  A hash table indexed by syscall id is a good way to
+store that information; every time the sys_enter() handler is called,
+we simply increment a count associated with that hash entry indexed by
+that syscall id:
+
+----
+  syscalls = autodict()
+
+  try:
+    syscalls[id] += 1
+  except TypeError:
+    syscalls[id] = 1
+----
+
+The syscalls 'autodict' object is a special kind of Python dictionary
+(implemented in Core.py) that implements Perl's 'autovivifying' hashes
+in Python i.e. with autovivifying hashes, you can assign nested hash
+values without having to go to the trouble of creating intermediate
+levels if they don't exist e.g syscalls[comm][pid][id] = 1 will create
+the intermediate hash levels and finally assign the value 1 to the
+hash entry for 'id' (because the value being assigned isn't a hash
+object itself, the initial value is assigned in the TypeError
+exception.  Well, there may be a better way to do this in Python but
+that's what works for now).
+
+Putting that code into the raw_syscalls__sys_enter() handler, we
+effectively end up with a single-level dictionary keyed on syscall id
+and having the counts we've tallied as values.
+
+The print_syscall_totals() function iterates over the entries in the
+dictionary and displays a line for each entry containing the syscall
+name (the dictonary keys contain the syscall ids, which are passed to
+the Util function syscall_name(), which translates the raw syscall
+numbers to the corresponding syscall name strings).  The output is
+displayed after all the events in the trace have been processed, by
+calling the print_syscall_totals() function from the trace_end()
+handler called at the end of script processing.
+
+The final script producing the output shown above is shown in its
+entirety below (syscall_name() helper is not yet available, you can
+only deal with id's for now):
+
+----
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+       '/scripts/python/perf-script-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+syscalls = autodict()
+
+def trace_end():
+       print_syscall_totals()
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+       common_secs, common_nsecs, common_pid, common_comm,
+       id, args):
+       try:
+               syscalls[id] += 1
+       except TypeError:
+               syscalls[id] = 1
+
+def print_syscall_totals():
+    if for_comm is not None:
+           print "\nsyscall events for %s:\n\n" % (for_comm),
+    else:
+           print "\nsyscall events:\n\n",
+
+    print "%-40s  %10s\n" % ("event", "count"),
+    print "%-40s  %10s\n" % ("----------------------------------------", \
+                                 "-----------"),
+
+    for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
+                                 reverse = True):
+           print "%-40s  %10d\n" % (syscall_name(id), val),
+----
+
+The script can be run just as before:
+
+  # perf script -s syscall-counts.py
+
+So those are the essential steps in writing and running a script.  The
+process can be generalized to any tracepoint or set of tracepoints
+you're interested in - basically find the tracepoint(s) you're
+interested in by looking at the list of available events shown by
+'perf list' and/or look in /sys/kernel/debug/tracing events for
+detailed event and field info, record the corresponding trace data
+using 'perf record', passing it the list of interesting events,
+generate a skeleton script using 'perf script -g python' and modify the
+code to aggregate and display it for your particular needs.
+
+After you've done that you may end up with a general-purpose script
+that you want to keep around and have available for future use.  By
+writing a couple of very simple shell scripts and putting them in the
+right place, you can have your script listed alongside the other
+scripts listed by the 'perf script -l' command e.g.:
+
+----
+root@tropicana:~# perf script -l
+List of available trace scripts:
+  workqueue-stats                      workqueue stats (ins/exe/create/destroy)
+  wakeup-latency                       system-wide min/max/avg wakeup latency
+  rw-by-file <comm>                    r/w activity for a program, by file
+  rw-by-pid                            system-wide r/w activity
+----
+
+A nice side effect of doing this is that you also then capture the
+probably lengthy 'perf record' command needed to record the events for
+the script.
+
+To have the script appear as a 'built-in' script, you write two simple
+scripts, one for recording and one for 'reporting'.
+
+The 'record' script is a shell script with the same base name as your
+script, but with -record appended.  The shell script should be put
+into the perf/scripts/python/bin directory in the kernel source tree.
+In that script, you write the 'perf record' command-line needed for
+your script:
+
+----
+# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
+
+#!/bin/bash
+perf record -a -e raw_syscalls:sys_enter
+----
+
+The 'report' script is also a shell script with the same base name as
+your script, but with -report appended.  It should also be located in
+the perf/scripts/python/bin directory.  In that script, you write the
+'perf script -s' command-line needed for running your script:
+
+----
+# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
+
+#!/bin/bash
+# description: system-wide syscall counts
+perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py
+----
+
+Note that the location of the Python script given in the shell script
+is in the libexec/perf-core/scripts/python directory - this is where
+the script will be copied by 'make install' when you install perf.
+For the installation to install your script there, your script needs
+to be located in the perf/scripts/python directory in the kernel
+source tree:
+
+----
+# ls -al kernel-source/tools/perf/scripts/python
+
+root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python
+total 32
+drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
+drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
+drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
+-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py
+drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util
+-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
+----
+
+Once you've done that (don't forget to do a new 'make install',
+otherwise your script won't show up at run-time), 'perf script -l'
+should show a new entry for your script:
+
+----
+root@tropicana:~# perf script -l
+List of available trace scripts:
+  workqueue-stats                      workqueue stats (ins/exe/create/destroy)
+  wakeup-latency                       system-wide min/max/avg wakeup latency
+  rw-by-file <comm>                    r/w activity for a program, by file
+  rw-by-pid                            system-wide r/w activity
+  syscall-counts                       system-wide syscall counts
+----
+
+You can now perform the record step via 'perf script record':
+
+  # perf script record syscall-counts
+
+and display the output using 'perf script report':
+
+  # perf script report syscall-counts
+
+STARTER SCRIPTS
+---------------
+
+You can quickly get started writing a script for a particular set of
+trace data by generating a skeleton script using 'perf script -g
+python' in the same directory as an existing perf.data trace file.
+That will generate a starter script containing a handler for each of
+the event types in the trace file; it simply prints every available
+field for each event in the trace file.
+
+You can also look at the existing scripts in
+~/libexec/perf-core/scripts/python for typical examples showing how to
+do basic things like aggregate event data, print results, etc.  Also,
+the check-perf-script.py script, while not interesting for its results,
+attempts to exercise all of the main scripting features.
+
+EVENT HANDLERS
+--------------
+
+When perf script is invoked using a trace script, a user-defined
+'handler function' is called for each event in the trace.  If there's
+no handler function defined for a given event type, the event is
+ignored (or passed to a 'trace_handled' function, see below) and the
+next event is processed.
+
+Most of the event's field values are passed as arguments to the
+handler function; some of the less common ones aren't - those are
+available as calls back into the perf executable (see below).
+
+As an example, the following perf record command can be used to record
+all sched_wakeup events in the system:
+
+ # perf record -a -e sched:sched_wakeup
+
+Traces meant to be processed using a script should be recorded with
+the above option: -a to enable system-wide collection.
+
+The format file for the sched_wakep event defines the following fields
+(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
+
+----
+ format:
+        field:unsigned short common_type;
+        field:unsigned char common_flags;
+        field:unsigned char common_preempt_count;
+        field:int common_pid;
+        field:int common_lock_depth;
+
+        field:char comm[TASK_COMM_LEN];
+        field:pid_t pid;
+        field:int prio;
+        field:int success;
+        field:int target_cpu;
+----
+
+The handler function for this event would be defined as:
+
+----
+def sched__sched_wakeup(event_name, context, common_cpu, common_secs,
+       common_nsecs, common_pid, common_comm,
+       comm, pid, prio, success, target_cpu):
+       pass
+----
+
+The handler function takes the form subsystem__event_name.
+
+The common_* arguments in the handler's argument list are the set of
+arguments passed to all event handlers; some of the fields correspond
+to the common_* fields in the format file, but some are synthesized,
+and some of the common_* fields aren't common enough to to be passed
+to every event as arguments but are available as library functions.
+
+Here's a brief description of each of the invariant event args:
+
+ event_name                the name of the event as text
+ context                   an opaque 'cookie' used in calls back into perf
+ common_cpu                the cpu the event occurred on
+ common_secs               the secs portion of the event timestamp
+ common_nsecs              the nsecs portion of the event timestamp
+ common_pid                the pid of the current task
+ common_comm               the name of the current process
+
+All of the remaining fields in the event's format file have
+counterparts as handler function arguments of the same name, as can be
+seen in the example above.
+
+The above provides the basics needed to directly access every field of
+every event in a trace, which covers 90% of what you need to know to
+write a useful trace script.  The sections below cover the rest.
+
+SCRIPT LAYOUT
+-------------
+
+Every perf script Python script should start by setting up a Python
+module search path and 'import'ing a few support modules (see module
+descriptions below):
+
+----
+ import os
+ import sys
+
+ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+             '/scripts/python/perf-script-Util/lib/Perf/Trace')
+
+ from perf_trace_context import *
+ from Core import *
+----
+
+The rest of the script can contain handler functions and support
+functions in any order.
+
+Aside from the event handler functions discussed above, every script
+can implement a set of optional functions:
+
+*trace_begin*, if defined, is called before any event is processed and
+gives scripts a chance to do setup tasks:
+
+----
+def trace_begin:
+    pass
+----
+
+*trace_end*, if defined, is called after all events have been
+ processed and gives scripts a chance to do end-of-script tasks, such
+ as display results:
+
+----
+def trace_end:
+    pass
+----
+
+*trace_unhandled*, if defined, is called after for any event that
+ doesn't have a handler explicitly defined for it.  The standard set
+ of common arguments are passed into it:
+
+----
+def trace_unhandled(event_name, context, common_cpu, common_secs,
+        common_nsecs, common_pid, common_comm):
+    pass
+----
+
+The remaining sections provide descriptions of each of the available
+built-in perf script Python modules and their associated functions.
+
+AVAILABLE MODULES AND FUNCTIONS
+-------------------------------
+
+The following sections describe the functions and variables available
+via the various perf script Python modules.  To use the functions and
+variables from the given module, add the corresponding 'from XXXX
+import' line to your perf script script.
+
+Core.py Module
+~~~~~~~~~~~~~~
+
+These functions provide some essential functions to user scripts.
+
+The *flag_str* and *symbol_str* functions provide human-readable
+strings for flag and symbolic fields.  These correspond to the strings
+and values parsed from the 'print fmt' fields of the event format
+files:
+
+  flag_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the flag field field_name of event event_name
+  symbol_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the symbolic field field_name of event event_name
+
+The *autodict* function returns a special kind of Python
+dictionary that implements Perl's 'autovivifying' hashes in Python
+i.e. with autovivifying hashes, you can assign nested hash values
+without having to go to the trouble of creating intermediate levels if
+they don't exist.
+
+  autodict() - returns an autovivifying dictionary instance
+
+
+perf_trace_context Module
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some of the 'common' fields in the event format file aren't all that
+common, but need to be made accessible to user scripts nonetheless.
+
+perf_trace_context defines a set of functions that can be used to
+access this data in the context of the current event.  Each of these
+functions expects a context variable, which is the same as the
+context variable passed into every event handler as the second
+argument.
+
+ common_pc(context) - returns common_preempt count for the current event
+ common_flags(context) - returns common_flags for the current event
+ common_lock_depth(context) - returns common_lock_depth for the current event
+
+Util.py Module
+~~~~~~~~~~~~~~
+
+Various utility functions for use with perf script:
+
+  nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
+  nsecs_secs(nsecs) - returns whole secs portion given nsecs
+  nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs
+  nsecs_str(nsecs) - returns printable string in the form secs.nsecs
+  avg(total, n) - returns average given a sum and a total number of values
+
+SEE ALSO
+--------
+linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
new file mode 100644 (file)
index 0000000..29ad942
--- /dev/null
@@ -0,0 +1,118 @@
+perf-script(1)
+=============
+
+NAME
+----
+perf-script - Read perf.data (created by perf record) and display trace output
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [<options>]
+'perf script' [<options>] record <script> [<record-options>] <command>
+'perf script' [<options>] report <script> [script-args]
+'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
+'perf script' [<options>] <top-script> [script-args]
+
+DESCRIPTION
+-----------
+This command reads the input file and displays the trace recorded.
+
+There are several variants of perf script:
+
+  'perf script' to see a detailed trace of the workload that was
+  recorded.
+
+  You can also run a set of pre-canned scripts that aggregate and
+  summarize the raw trace data in various ways (the list of scripts is
+  available via 'perf script -l').  The following variants allow you to
+  record and run those scripts:
+
+  'perf script record <script> <command>' to record the events required
+  for 'perf script report'.  <script> is the name displayed in the
+  output of 'perf script --list' i.e. the actual script name minus any
+  language extension.  If <command> is not specified, the events are
+  recorded using the -a (system-wide) 'perf record' option.
+
+  'perf script report <script> [args]' to run and display the results
+  of <script>.  <script> is the name displayed in the output of 'perf
+  trace --list' i.e. the actual script name minus any language
+  extension.  The perf.data output from a previous run of 'perf script
+  record <script>' is used and should be present for this command to
+  succeed.  [args] refers to the (mainly optional) args expected by
+  the script.
+
+  'perf script <script> <required-script-args> <command>' to both
+  record the events required for <script> and to run the <script>
+  using 'live-mode' i.e. without writing anything to disk.  <script>
+  is the name displayed in the output of 'perf script --list' i.e. the
+  actual script name minus any language extension.  If <command> is
+  not specified, the events are recorded using the -a (system-wide)
+  'perf record' option.  If <script> has any required args, they
+  should be specified before <command>.  This mode doesn't allow for
+  optional script args to be specified; if optional script args are
+  desired, they can be specified using separate 'perf script record'
+  and 'perf script report' commands, with the stdout of the record step
+  piped to the stdin of the report script, using the '-o -' and '-i -'
+  options of the corresponding commands.
+
+  'perf script <top-script>' to both record the events required for
+  <top-script> and to run the <top-script> using 'live-mode'
+  i.e. without writing anything to disk.  <top-script> is the name
+  displayed in the output of 'perf script --list' i.e. the actual
+  script name minus any language extension; a <top-script> is defined
+  as any script name ending with the string 'top'.
+
+  [<record-options>] can be passed to the record steps of 'perf script
+  record' and 'live-mode' variants; this isn't possible however for
+  <top-script> 'live-mode' or 'perf script report' variants.
+
+  See the 'SEE ALSO' section for links to language-specific
+  information on how to write and run your own trace scripts.
+
+OPTIONS
+-------
+<command>...::
+       Any command you can specify in a shell.
+
+-D::
+--dump-raw-script=::
+        Display verbose dump of the trace data.
+
+-L::
+--Latency=::
+        Show latency attributes (irqs/preemption disabled, etc).
+
+-l::
+--list=::
+        Display a list of available trace scripts.
+
+-s ['lang']::
+--script=::
+        Process trace data with the given script ([lang]:script[.ext]).
+       If the string 'lang' is specified in place of a script name, a
+        list of supported languages will be displayed instead.
+
+-g::
+--gen-script=::
+        Generate perf-script.[ext] starter script for given language,
+        using current perf.data.
+
+-a::
+        Force system-wide collection.  Scripts run without a <command>
+        normally use -a by default, while scripts run with a <command>
+        normally don't - this option allows the latter to be run in
+        system-wide mode.
+
+-i::
+--input=::
+        Input file name.
+
+-d::
+--debug-mode::
+        Do various checks like samples ordering and lost events.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-script-perl[1],
+linkperf:perf-script-python[1]
index 4b3a2d46b4378607f5195d12328646f5b1d7a638..b6da7affbbeeb82533387e9ba3f7c788d3e28dec 100644 (file)
@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics
 SYNOPSIS
 --------
 [verse]
-'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command>
-'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
 
 DESCRIPTION
 -----------
@@ -35,24 +35,54 @@ OPTIONS
         child tasks do not inherit counters
 -p::
 --pid=<pid>::
-        stat events on existing pid
+        stat events on existing process id
+
+-t::
+--tid=<tid>::
+        stat events on existing thread id
+
 
 -a::
-        system-wide collection
+--all-cpus::
+        system-wide collection from all CPUs
 
 -c::
-        scale counter values
+--scale::
+       scale/normalize counter values
+
+-r::
+--repeat=<n>::
+       repeat command and print average + stddev (max: 100)
 
 -B::
+--big-num::
         print large numbers with thousands' separators according to locale
 
 -C::
 --cpu=::
-Count only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Count only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 In per-thread mode, this option is ignored. The -a option is still necessary
 to activate system-wide monitoring. Default is to count on all CPUs.
 
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
+This option is only valid in system-wide mode.
+
+-n::
+--null::
+        null run - don't start any counters
+
+-v::
+--verbose::
+        be more verbose (show counter open errors, etc)
+
+-x SEP::
+--field-separator SEP::
+print counts using a CSV-style output to make it easy to import directly into
+spreadsheets. Columns are separated by the string specified in SEP.
+
 EXAMPLES
 --------
 
index 1c4b5f5b7f71ec7047be7f02a369e8e27fc8710d..2c3b462f64b00531b4c8a7e5fc5cb6224dfc88d6 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-This command does assorted sanity tests, initially thru linked routines but
+This command does assorted sanity tests, initially through linked routines but
 also will look for a directory with more tests in the form of scripts.
 
 OPTIONS
index 4b1788355ecac3d305bf72e6f58d5a477e08ba7b..d7b79e2ba2adbe2cc0cb6468a9d84d6b73b8ed0f 100644 (file)
@@ -38,6 +38,8 @@ OPTIONS
 --process::
         Select the processes to display, by name or PID
 
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
 
 SEE ALSO
 --------
index 1f9687663f2a9cd62d6cff9f597395523b3da931..f6eb1cdafb7758162463b0ca8a25525f94c44d19 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-This command generates and displays a performance counter profile in realtime.
+This command generates and displays a performance counter profile in real time.
 
 
 OPTIONS
@@ -27,8 +27,8 @@ OPTIONS
 
 -C <cpu-list>::
 --cpu=<cpu>::
-Monitor only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 Default is to monitor all CPUS.
 
 -d <seconds>::
@@ -50,6 +50,10 @@ Default is to monitor all CPUS.
 --count-filter=<count>::
        Only display functions with more events than this.
 
+-g::
+--group::
+        Put the counters into a counter group.
+
 -F <freq>::
 --freq=<freq>::
        Profile at this frequency.
@@ -68,7 +72,11 @@ Default is to monitor all CPUS.
 
 -p <pid>::
 --pid=<pid>::
-       Profile events on existing pid.
+       Profile events on existing Process ID.
+
+-t <tid>::
+--tid=<tid>::
+        Profile events on existing thread ID.
 
 -r <priority>::
 --realtime=<priority>::
@@ -78,6 +86,18 @@ Default is to monitor all CPUS.
 --sym-annotate=<symbol>::
         Annotate this symbol.
 
+-K::
+--hide_kernel_symbols::
+        Hide kernel symbols.
+
+-U::
+--hide_user_symbols::
+        Hide user symbols.
+
+-D::
+--dump-symtab::
+        Dump the symbol table used for profiling.
+
 -v::
 --verbose::
        Be more verbose (show counter open errors, etc).
diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt
deleted file mode 100644 (file)
index ee6525e..0000000
+++ /dev/null
@@ -1,217 +0,0 @@
-perf-trace-perl(1)
-==================
-
-NAME
-----
-perf-trace-perl - Process trace data with a Perl script
-
-SYNOPSIS
---------
-[verse]
-'perf trace' [-s [Perl]:script[.pl] ]
-
-DESCRIPTION
------------
-
-This perf trace option is used to process perf trace data using perf's
-built-in Perl interpreter.  It reads and processes the input file and
-displays the results of the trace analysis implemented in the given
-Perl script, if any.
-
-STARTER SCRIPTS
----------------
-
-You can avoid reading the rest of this document by running 'perf trace
--g perl' in the same directory as an existing perf.data trace file.
-That will generate a starter script containing a handler for each of
-the event types in the trace file; it simply prints every available
-field for each event in the trace file.
-
-You can also look at the existing scripts in
-~/libexec/perf-core/scripts/perl for typical examples showing how to
-do basic things like aggregate event data, print results, etc.  Also,
-the check-perf-trace.pl script, while not interesting for its results,
-attempts to exercise all of the main scripting features.
-
-EVENT HANDLERS
---------------
-
-When perf trace is invoked using a trace script, a user-defined
-'handler function' is called for each event in the trace.  If there's
-no handler function defined for a given event type, the event is
-ignored (or passed to a 'trace_handled' function, see below) and the
-next event is processed.
-
-Most of the event's field values are passed as arguments to the
-handler function; some of the less common ones aren't - those are
-available as calls back into the perf executable (see below).
-
-As an example, the following perf record command can be used to record
-all sched_wakeup events in the system:
-
- # perf record -a -e sched:sched_wakeup
-
-Traces meant to be processed using a script should be recorded with
-the above option: -a to enable system-wide collection.
-
-The format file for the sched_wakep event defines the following fields
-(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
-
-----
- format:
-        field:unsigned short common_type;
-        field:unsigned char common_flags;
-        field:unsigned char common_preempt_count;
-        field:int common_pid;
-        field:int common_lock_depth;
-
-        field:char comm[TASK_COMM_LEN];
-        field:pid_t pid;
-        field:int prio;
-        field:int success;
-        field:int target_cpu;
-----
-
-The handler function for this event would be defined as:
-
-----
-sub sched::sched_wakeup
-{
-   my ($event_name, $context, $common_cpu, $common_secs,
-       $common_nsecs, $common_pid, $common_comm,
-       $comm, $pid, $prio, $success, $target_cpu) = @_;
-}
-----
-
-The handler function takes the form subsystem::event_name.
-
-The $common_* arguments in the handler's argument list are the set of
-arguments passed to all event handlers; some of the fields correspond
-to the common_* fields in the format file, but some are synthesized,
-and some of the common_* fields aren't common enough to to be passed
-to every event as arguments but are available as library functions.
-
-Here's a brief description of each of the invariant event args:
-
- $event_name               the name of the event as text
- $context                  an opaque 'cookie' used in calls back into perf
- $common_cpu               the cpu the event occurred on
- $common_secs              the secs portion of the event timestamp
- $common_nsecs             the nsecs portion of the event timestamp
- $common_pid               the pid of the current task
- $common_comm              the name of the current process
-
-All of the remaining fields in the event's format file have
-counterparts as handler function arguments of the same name, as can be
-seen in the example above.
-
-The above provides the basics needed to directly access every field of
-every event in a trace, which covers 90% of what you need to know to
-write a useful trace script.  The sections below cover the rest.
-
-SCRIPT LAYOUT
--------------
-
-Every perf trace Perl script should start by setting up a Perl module
-search path and 'use'ing a few support modules (see module
-descriptions below):
-
-----
- use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
- use lib "./Perf-Trace-Util/lib";
- use Perf::Trace::Core;
- use Perf::Trace::Context;
- use Perf::Trace::Util;
-----
-
-The rest of the script can contain handler functions and support
-functions in any order.
-
-Aside from the event handler functions discussed above, every script
-can implement a set of optional functions:
-
-*trace_begin*, if defined, is called before any event is processed and
-gives scripts a chance to do setup tasks:
-
-----
- sub trace_begin
- {
- }
-----
-
-*trace_end*, if defined, is called after all events have been
- processed and gives scripts a chance to do end-of-script tasks, such
- as display results:
-
-----
-sub trace_end
-{
-}
-----
-
-*trace_unhandled*, if defined, is called after for any event that
- doesn't have a handler explicitly defined for it.  The standard set
- of common arguments are passed into it:
-
-----
-sub trace_unhandled
-{
-    my ($event_name, $context, $common_cpu, $common_secs,
-        $common_nsecs, $common_pid, $common_comm) = @_;
-}
-----
-
-The remaining sections provide descriptions of each of the available
-built-in perf trace Perl modules and their associated functions.
-
-AVAILABLE MODULES AND FUNCTIONS
--------------------------------
-
-The following sections describe the functions and variables available
-via the various Perf::Trace::* Perl modules.  To use the functions and
-variables from the given module, add the corresponding 'use
-Perf::Trace::XXX' line to your perf trace script.
-
-Perf::Trace::Core Module
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-These functions provide some essential functions to user scripts.
-
-The *flag_str* and *symbol_str* functions provide human-readable
-strings for flag and symbolic fields.  These correspond to the strings
-and values parsed from the 'print fmt' fields of the event format
-files:
-
-  flag_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the flag field $field_name of event $event_name
-  symbol_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the symbolic field $field_name of event $event_name
-
-Perf::Trace::Context Module
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Some of the 'common' fields in the event format file aren't all that
-common, but need to be made accessible to user scripts nonetheless.
-
-Perf::Trace::Context defines a set of functions that can be used to
-access this data in the context of the current event.  Each of these
-functions expects a $context variable, which is the same as the
-$context variable passed into every event handler as the second
-argument.
-
- common_pc($context) - returns common_preempt count for the current event
- common_flags($context) - returns common_flags for the current event
- common_lock_depth($context) - returns common_lock_depth for the current event
-
-Perf::Trace::Util Module
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-Various utility functions for use with perf trace:
-
-  nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
-  nsecs_secs($nsecs) - returns whole secs portion given nsecs
-  nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs
-  nsecs_str($nsecs) - returns printable string in the form secs.nsecs
-  avg($total, $n) - returns average given a sum and a total number of values
-
-SEE ALSO
---------
-linkperf:perf-trace[1]
diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-trace-python.txt
deleted file mode 100644 (file)
index 693be80..0000000
+++ /dev/null
@@ -1,623 +0,0 @@
-perf-trace-python(1)
-====================
-
-NAME
-----
-perf-trace-python - Process trace data with a Python script
-
-SYNOPSIS
---------
-[verse]
-'perf trace' [-s [Python]:script[.py] ]
-
-DESCRIPTION
------------
-
-This perf trace option is used to process perf trace data using perf's
-built-in Python interpreter.  It reads and processes the input file and
-displays the results of the trace analysis implemented in the given
-Python script, if any.
-
-A QUICK EXAMPLE
----------------
-
-This section shows the process, start to finish, of creating a working
-Python script that aggregates and extracts useful information from a
-raw perf trace stream.  You can avoid reading the rest of this
-document if an example is enough for you; the rest of the document
-provides more details on each step and lists the library functions
-available to script writers.
-
-This example actually details the steps that were used to create the
-'syscall-counts' script you see when you list the available perf trace
-scripts via 'perf trace -l'.  As such, this script also shows how to
-integrate your script into the list of general-purpose 'perf trace'
-scripts listed by that command.
-
-The syscall-counts script is a simple script, but demonstrates all the
-basic ideas necessary to create a useful script.  Here's an example
-of its output (syscall names are not yet supported, they will appear
-as numbers):
-
-----
-syscall events:
-
-event                                          count
-----------------------------------------  -----------
-sys_write                                     455067
-sys_getdents                                    4072
-sys_close                                       3037
-sys_swapoff                                     1769
-sys_read                                         923
-sys_sched_setparam                               826
-sys_open                                         331
-sys_newfstat                                     326
-sys_mmap                                         217
-sys_munmap                                       216
-sys_futex                                        141
-sys_select                                       102
-sys_poll                                          84
-sys_setitimer                                     12
-sys_writev                                         8
-15                                                 8
-sys_lseek                                          7
-sys_rt_sigprocmask                                 6
-sys_wait4                                          3
-sys_ioctl                                          3
-sys_set_robust_list                                1
-sys_exit                                           1
-56                                                 1
-sys_access                                         1
-----
-
-Basically our task is to keep a per-syscall tally that gets updated
-every time a system call occurs in the system.  Our script will do
-that, but first we need to record the data that will be processed by
-that script.  Theoretically, there are a couple of ways we could do
-that:
-
-- we could enable every event under the tracing/events/syscalls
-  directory, but this is over 600 syscalls, well beyond the number
-  allowable by perf.  These individual syscall events will however be
-  useful if we want to later use the guidance we get from the
-  general-purpose scripts to drill down and get more detail about
-  individual syscalls of interest.
-
-- we can enable the sys_enter and/or sys_exit syscalls found under
-  tracing/events/raw_syscalls.  These are called for all syscalls; the
-  'id' field can be used to distinguish between individual syscall
-  numbers.
-
-For this script, we only need to know that a syscall was entered; we
-don't care how it exited, so we'll use 'perf record' to record only
-the sys_enter events:
-
-----
-# perf record -a -e raw_syscalls:sys_enter
-
-^C[ perf record: Woken up 1 times to write data ]
-[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
-----
-
-The options basically say to collect data for every syscall event
-system-wide and multiplex the per-cpu output into a single stream.
-That single stream will be recorded in a file in the current directory
-called perf.data.
-
-Once we have a perf.data file containing our data, we can use the -g
-'perf trace' option to generate a Python script that will contain a
-callback handler for each event type found in the perf.data trace
-stream (for more details, see the STARTER SCRIPTS section).
-
-----
-# perf trace -g python
-generated Python script: perf-trace.py
-
-The output file created also in the current directory is named
-perf-trace.py.  Here's the file in its entirety:
-
-# perf trace event handlers, generated by perf trace -g python
-# Licensed under the terms of the GNU GPL License version 2
-
-# The common_* event handler fields are the most useful fields common to
-# all events.  They don't necessarily correspond to the 'common_*' fields
-# in the format files.  Those fields not available as handler params can
-# be retrieved using Python functions of the form common_*(context).
-# See the perf-trace-python Documentation for the list of available functions.
-
-import os
-import sys
-
-sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-       '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
-
-from perf_trace_context import *
-from Core import *
-
-def trace_begin():
-       print "in trace_begin"
-
-def trace_end():
-       print "in trace_end"
-
-def raw_syscalls__sys_enter(event_name, context, common_cpu,
-       common_secs, common_nsecs, common_pid, common_comm,
-       id, args):
-               print_header(event_name, common_cpu, common_secs, common_nsecs,
-                       common_pid, common_comm)
-
-               print "id=%d, args=%s\n" % \
-               (id, args),
-
-def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
-               common_pid, common_comm):
-               print_header(event_name, common_cpu, common_secs, common_nsecs,
-               common_pid, common_comm)
-
-def print_header(event_name, cpu, secs, nsecs, pid, comm):
-       print "%-20s %5u %05u.%09u %8u %-20s " % \
-       (event_name, cpu, secs, nsecs, pid, comm),
-----
-
-At the top is a comment block followed by some import statements and a
-path append which every perf trace script should include.
-
-Following that are a couple generated functions, trace_begin() and
-trace_end(), which are called at the beginning and the end of the
-script respectively (for more details, see the SCRIPT_LAYOUT section
-below).
-
-Following those are the 'event handler' functions generated one for
-every event in the 'perf record' output.  The handler functions take
-the form subsystem__event_name, and contain named parameters, one for
-each field in the event; in this case, there's only one event,
-raw_syscalls__sys_enter().  (see the EVENT HANDLERS section below for
-more info on event handlers).
-
-The final couple of functions are, like the begin and end functions,
-generated for every script.  The first, trace_unhandled(), is called
-every time the script finds an event in the perf.data file that
-doesn't correspond to any event handler in the script.  This could
-mean either that the record step recorded event types that it wasn't
-really interested in, or the script was run against a trace file that
-doesn't correspond to the script.
-
-The script generated by -g option simply prints a line for each
-event found in the trace stream i.e. it basically just dumps the event
-and its parameter values to stdout.  The print_header() function is
-simply a utility function used for that purpose.  Let's rename the
-script and run it to see the default output:
-
-----
-# mv perf-trace.py syscall-counts.py
-# perf trace -s syscall-counts.py
-
-raw_syscalls__sys_enter     1 00840.847582083     7506 perf                  id=1, args=
-raw_syscalls__sys_enter     1 00840.847595764     7506 perf                  id=1, args=
-raw_syscalls__sys_enter     1 00840.847620860     7506 perf                  id=1, args=
-raw_syscalls__sys_enter     1 00840.847710478     6533 npviewer.bin          id=78, args=
-raw_syscalls__sys_enter     1 00840.847719204     6533 npviewer.bin          id=142, args=
-raw_syscalls__sys_enter     1 00840.847755445     6533 npviewer.bin          id=3, args=
-raw_syscalls__sys_enter     1 00840.847775601     6533 npviewer.bin          id=3, args=
-raw_syscalls__sys_enter     1 00840.847781820     6533 npviewer.bin          id=3, args=
-.
-.
-.
-----
-
-Of course, for this script, we're not interested in printing every
-trace event, but rather aggregating it in a useful way.  So we'll get
-rid of everything to do with printing as well as the trace_begin() and
-trace_unhandled() functions, which we won't be using.  That leaves us
-with this minimalistic skeleton:
-
-----
-import os
-import sys
-
-sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-       '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
-
-from perf_trace_context import *
-from Core import *
-
-def trace_end():
-       print "in trace_end"
-
-def raw_syscalls__sys_enter(event_name, context, common_cpu,
-       common_secs, common_nsecs, common_pid, common_comm,
-       id, args):
-----
-
-In trace_end(), we'll simply print the results, but first we need to
-generate some results to print.  To do that we need to have our
-sys_enter() handler do the necessary tallying until all events have
-been counted.  A hash table indexed by syscall id is a good way to
-store that information; every time the sys_enter() handler is called,
-we simply increment a count associated with that hash entry indexed by
-that syscall id:
-
-----
-  syscalls = autodict()
-
-  try:
-    syscalls[id] += 1
-  except TypeError:
-    syscalls[id] = 1
-----
-
-The syscalls 'autodict' object is a special kind of Python dictionary
-(implemented in Core.py) that implements Perl's 'autovivifying' hashes
-in Python i.e. with autovivifying hashes, you can assign nested hash
-values without having to go to the trouble of creating intermediate
-levels if they don't exist e.g syscalls[comm][pid][id] = 1 will create
-the intermediate hash levels and finally assign the value 1 to the
-hash entry for 'id' (because the value being assigned isn't a hash
-object itself, the initial value is assigned in the TypeError
-exception.  Well, there may be a better way to do this in Python but
-that's what works for now).
-
-Putting that code into the raw_syscalls__sys_enter() handler, we
-effectively end up with a single-level dictionary keyed on syscall id
-and having the counts we've tallied as values.
-
-The print_syscall_totals() function iterates over the entries in the
-dictionary and displays a line for each entry containing the syscall
-name (the dictonary keys contain the syscall ids, which are passed to
-the Util function syscall_name(), which translates the raw syscall
-numbers to the corresponding syscall name strings).  The output is
-displayed after all the events in the trace have been processed, by
-calling the print_syscall_totals() function from the trace_end()
-handler called at the end of script processing.
-
-The final script producing the output shown above is shown in its
-entirety below (syscall_name() helper is not yet available, you can
-only deal with id's for now):
-
-----
-import os
-import sys
-
-sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-       '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
-
-from perf_trace_context import *
-from Core import *
-from Util import *
-
-syscalls = autodict()
-
-def trace_end():
-       print_syscall_totals()
-
-def raw_syscalls__sys_enter(event_name, context, common_cpu,
-       common_secs, common_nsecs, common_pid, common_comm,
-       id, args):
-       try:
-               syscalls[id] += 1
-       except TypeError:
-               syscalls[id] = 1
-
-def print_syscall_totals():
-    if for_comm is not None:
-           print "\nsyscall events for %s:\n\n" % (for_comm),
-    else:
-           print "\nsyscall events:\n\n",
-
-    print "%-40s  %10s\n" % ("event", "count"),
-    print "%-40s  %10s\n" % ("----------------------------------------", \
-                                 "-----------"),
-
-    for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
-                                 reverse = True):
-           print "%-40s  %10d\n" % (syscall_name(id), val),
-----
-
-The script can be run just as before:
-
-  # perf trace -s syscall-counts.py
-
-So those are the essential steps in writing and running a script.  The
-process can be generalized to any tracepoint or set of tracepoints
-you're interested in - basically find the tracepoint(s) you're
-interested in by looking at the list of available events shown by
-'perf list' and/or look in /sys/kernel/debug/tracing events for
-detailed event and field info, record the corresponding trace data
-using 'perf record', passing it the list of interesting events,
-generate a skeleton script using 'perf trace -g python' and modify the
-code to aggregate and display it for your particular needs.
-
-After you've done that you may end up with a general-purpose script
-that you want to keep around and have available for future use.  By
-writing a couple of very simple shell scripts and putting them in the
-right place, you can have your script listed alongside the other
-scripts listed by the 'perf trace -l' command e.g.:
-
-----
-root@tropicana:~# perf trace -l
-List of available trace scripts:
-  workqueue-stats                      workqueue stats (ins/exe/create/destroy)
-  wakeup-latency                       system-wide min/max/avg wakeup latency
-  rw-by-file <comm>                    r/w activity for a program, by file
-  rw-by-pid                            system-wide r/w activity
-----
-
-A nice side effect of doing this is that you also then capture the
-probably lengthy 'perf record' command needed to record the events for
-the script.
-
-To have the script appear as a 'built-in' script, you write two simple
-scripts, one for recording and one for 'reporting'.
-
-The 'record' script is a shell script with the same base name as your
-script, but with -record appended.  The shell script should be put
-into the perf/scripts/python/bin directory in the kernel source tree.
-In that script, you write the 'perf record' command-line needed for
-your script:
-
-----
-# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
-
-#!/bin/bash
-perf record -a -e raw_syscalls:sys_enter
-----
-
-The 'report' script is also a shell script with the same base name as
-your script, but with -report appended.  It should also be located in
-the perf/scripts/python/bin directory.  In that script, you write the
-'perf trace -s' command-line needed for running your script:
-
-----
-# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
-
-#!/bin/bash
-# description: system-wide syscall counts
-perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py
-----
-
-Note that the location of the Python script given in the shell script
-is in the libexec/perf-core/scripts/python directory - this is where
-the script will be copied by 'make install' when you install perf.
-For the installation to install your script there, your script needs
-to be located in the perf/scripts/python directory in the kernel
-source tree:
-
-----
-# ls -al kernel-source/tools/perf/scripts/python
-
-root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python
-total 32
-drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
-drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
-drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
--rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py
-drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util
--rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
-----
-
-Once you've done that (don't forget to do a new 'make install',
-otherwise your script won't show up at run-time), 'perf trace -l'
-should show a new entry for your script:
-
-----
-root@tropicana:~# perf trace -l
-List of available trace scripts:
-  workqueue-stats                      workqueue stats (ins/exe/create/destroy)
-  wakeup-latency                       system-wide min/max/avg wakeup latency
-  rw-by-file <comm>                    r/w activity for a program, by file
-  rw-by-pid                            system-wide r/w activity
-  syscall-counts                       system-wide syscall counts
-----
-
-You can now perform the record step via 'perf trace record':
-
-  # perf trace record syscall-counts
-
-and display the output using 'perf trace report':
-
-  # perf trace report syscall-counts
-
-STARTER SCRIPTS
----------------
-
-You can quickly get started writing a script for a particular set of
-trace data by generating a skeleton script using 'perf trace -g
-python' in the same directory as an existing perf.data trace file.
-That will generate a starter script containing a handler for each of
-the event types in the trace file; it simply prints every available
-field for each event in the trace file.
-
-You can also look at the existing scripts in
-~/libexec/perf-core/scripts/python for typical examples showing how to
-do basic things like aggregate event data, print results, etc.  Also,
-the check-perf-trace.py script, while not interesting for its results,
-attempts to exercise all of the main scripting features.
-
-EVENT HANDLERS
---------------
-
-When perf trace is invoked using a trace script, a user-defined
-'handler function' is called for each event in the trace.  If there's
-no handler function defined for a given event type, the event is
-ignored (or passed to a 'trace_handled' function, see below) and the
-next event is processed.
-
-Most of the event's field values are passed as arguments to the
-handler function; some of the less common ones aren't - those are
-available as calls back into the perf executable (see below).
-
-As an example, the following perf record command can be used to record
-all sched_wakeup events in the system:
-
- # perf record -a -e sched:sched_wakeup
-
-Traces meant to be processed using a script should be recorded with
-the above option: -a to enable system-wide collection.
-
-The format file for the sched_wakep event defines the following fields
-(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
-
-----
- format:
-        field:unsigned short common_type;
-        field:unsigned char common_flags;
-        field:unsigned char common_preempt_count;
-        field:int common_pid;
-        field:int common_lock_depth;
-
-        field:char comm[TASK_COMM_LEN];
-        field:pid_t pid;
-        field:int prio;
-        field:int success;
-        field:int target_cpu;
-----
-
-The handler function for this event would be defined as:
-
-----
-def sched__sched_wakeup(event_name, context, common_cpu, common_secs,
-       common_nsecs, common_pid, common_comm,
-       comm, pid, prio, success, target_cpu):
-       pass
-----
-
-The handler function takes the form subsystem__event_name.
-
-The common_* arguments in the handler's argument list are the set of
-arguments passed to all event handlers; some of the fields correspond
-to the common_* fields in the format file, but some are synthesized,
-and some of the common_* fields aren't common enough to to be passed
-to every event as arguments but are available as library functions.
-
-Here's a brief description of each of the invariant event args:
-
- event_name                the name of the event as text
- context                   an opaque 'cookie' used in calls back into perf
- common_cpu                the cpu the event occurred on
- common_secs               the secs portion of the event timestamp
- common_nsecs              the nsecs portion of the event timestamp
- common_pid                the pid of the current task
- common_comm               the name of the current process
-
-All of the remaining fields in the event's format file have
-counterparts as handler function arguments of the same name, as can be
-seen in the example above.
-
-The above provides the basics needed to directly access every field of
-every event in a trace, which covers 90% of what you need to know to
-write a useful trace script.  The sections below cover the rest.
-
-SCRIPT LAYOUT
--------------
-
-Every perf trace Python script should start by setting up a Python
-module search path and 'import'ing a few support modules (see module
-descriptions below):
-
-----
- import os
- import sys
-
- sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-             '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
-
- from perf_trace_context import *
- from Core import *
-----
-
-The rest of the script can contain handler functions and support
-functions in any order.
-
-Aside from the event handler functions discussed above, every script
-can implement a set of optional functions:
-
-*trace_begin*, if defined, is called before any event is processed and
-gives scripts a chance to do setup tasks:
-
-----
-def trace_begin:
-    pass
-----
-
-*trace_end*, if defined, is called after all events have been
- processed and gives scripts a chance to do end-of-script tasks, such
- as display results:
-
-----
-def trace_end:
-    pass
-----
-
-*trace_unhandled*, if defined, is called after for any event that
- doesn't have a handler explicitly defined for it.  The standard set
- of common arguments are passed into it:
-
-----
-def trace_unhandled(event_name, context, common_cpu, common_secs,
-        common_nsecs, common_pid, common_comm):
-    pass
-----
-
-The remaining sections provide descriptions of each of the available
-built-in perf trace Python modules and their associated functions.
-
-AVAILABLE MODULES AND FUNCTIONS
--------------------------------
-
-The following sections describe the functions and variables available
-via the various perf trace Python modules.  To use the functions and
-variables from the given module, add the corresponding 'from XXXX
-import' line to your perf trace script.
-
-Core.py Module
-~~~~~~~~~~~~~~
-
-These functions provide some essential functions to user scripts.
-
-The *flag_str* and *symbol_str* functions provide human-readable
-strings for flag and symbolic fields.  These correspond to the strings
-and values parsed from the 'print fmt' fields of the event format
-files:
-
-  flag_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the flag field field_name of event event_name
-  symbol_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the symbolic field field_name of event event_name
-
-The *autodict* function returns a special kind of Python
-dictionary that implements Perl's 'autovivifying' hashes in Python
-i.e. with autovivifying hashes, you can assign nested hash values
-without having to go to the trouble of creating intermediate levels if
-they don't exist.
-
-  autodict() - returns an autovivifying dictionary instance
-
-
-perf_trace_context Module
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Some of the 'common' fields in the event format file aren't all that
-common, but need to be made accessible to user scripts nonetheless.
-
-perf_trace_context defines a set of functions that can be used to
-access this data in the context of the current event.  Each of these
-functions expects a context variable, which is the same as the
-context variable passed into every event handler as the second
-argument.
-
- common_pc(context) - returns common_preempt count for the current event
- common_flags(context) - returns common_flags for the current event
- common_lock_depth(context) - returns common_lock_depth for the current event
-
-Util.py Module
-~~~~~~~~~~~~~~
-
-Various utility functions for use with perf trace:
-
-  nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
-  nsecs_secs(nsecs) - returns whole secs portion given nsecs
-  nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs
-  nsecs_str(nsecs) - returns printable string in the form secs.nsecs
-  avg(total, n) - returns average given a sum and a total number of values
-
-SEE ALSO
---------
-linkperf:perf-trace[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
deleted file mode 100644 (file)
index 26aff6b..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-perf-trace(1)
-=============
-
-NAME
-----
-perf-trace - Read perf.data (created by perf record) and display trace output
-
-SYNOPSIS
---------
-[verse]
-'perf trace' [<options>]
-'perf trace' [<options>] record <script> [<record-options>] <command>
-'perf trace' [<options>] report <script> [script-args]
-'perf trace' [<options>] <script> <required-script-args> [<record-options>] <command>
-'perf trace' [<options>] <top-script> [script-args]
-
-DESCRIPTION
------------
-This command reads the input file and displays the trace recorded.
-
-There are several variants of perf trace:
-
-  'perf trace' to see a detailed trace of the workload that was
-  recorded.
-
-  You can also run a set of pre-canned scripts that aggregate and
-  summarize the raw trace data in various ways (the list of scripts is
-  available via 'perf trace -l').  The following variants allow you to
-  record and run those scripts:
-
-  'perf trace record <script> <command>' to record the events required
-  for 'perf trace report'.  <script> is the name displayed in the
-  output of 'perf trace --list' i.e. the actual script name minus any
-  language extension.  If <command> is not specified, the events are
-  recorded using the -a (system-wide) 'perf record' option.
-
-  'perf trace report <script> [args]' to run and display the results
-  of <script>.  <script> is the name displayed in the output of 'perf
-  trace --list' i.e. the actual script name minus any language
-  extension.  The perf.data output from a previous run of 'perf trace
-  record <script>' is used and should be present for this command to
-  succeed.  [args] refers to the (mainly optional) args expected by
-  the script.
-
-  'perf trace <script> <required-script-args> <command>' to both
-  record the events required for <script> and to run the <script>
-  using 'live-mode' i.e. without writing anything to disk.  <script>
-  is the name displayed in the output of 'perf trace --list' i.e. the
-  actual script name minus any language extension.  If <command> is
-  not specified, the events are recorded using the -a (system-wide)
-  'perf record' option.  If <script> has any required args, they
-  should be specified before <command>.  This mode doesn't allow for
-  optional script args to be specified; if optional script args are
-  desired, they can be specified using separate 'perf trace record'
-  and 'perf trace report' commands, with the stdout of the record step
-  piped to the stdin of the report script, using the '-o -' and '-i -'
-  options of the corresponding commands.
-
-  'perf trace <top-script>' to both record the events required for
-  <top-script> and to run the <top-script> using 'live-mode'
-  i.e. without writing anything to disk.  <top-script> is the name
-  displayed in the output of 'perf trace --list' i.e. the actual
-  script name minus any language extension; a <top-script> is defined
-  as any script name ending with the string 'top'.
-
-  [<record-options>] can be passed to the record steps of 'perf trace
-  record' and 'live-mode' variants; this isn't possible however for
-  <top-script> 'live-mode' or 'perf trace report' variants.
-
-  See the 'SEE ALSO' section for links to language-specific
-  information on how to write and run your own trace scripts.
-
-OPTIONS
--------
-<command>...::
-       Any command you can specify in a shell.
-
--D::
---dump-raw-trace=::
-        Display verbose dump of the trace data.
-
--L::
---Latency=::
-        Show latency attributes (irqs/preemption disabled, etc).
-
--l::
---list=::
-        Display a list of available trace scripts.
-
--s ['lang']::
---script=::
-        Process trace data with the given script ([lang]:script[.ext]).
-       If the string 'lang' is specified in place of a script name, a
-        list of supported languages will be displayed instead.
-
--g::
---gen-script=::
-        Generate perf-trace.[ext] starter script for given language,
-        using current perf.data.
-
--a::
-        Force system-wide collection.  Scripts run without a <command>
-        normally use -a by default, while scripts run with a <command>
-        normally don't - this option allows the latter to be run in
-        system-wide mode.
-
-
-SEE ALSO
---------
-linkperf:perf-record[1], linkperf:perf-trace-perl[1],
-linkperf:perf-trace-python[1]
index 8c7fc0c8f0b8cd0a77ddb58fb5b712379276ebc8..c12659d8cb26fcefd8449581921aef2962cb651c 100644 (file)
@@ -7,6 +7,7 @@ include/linux/stringify.h
 lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
+arch/*/lib/memcpy*.S
 include/linux/poison.h
 include/linux/magic.h
 include/linux/hw_breakpoint.h
index d1db0f676a4bf14850fa0264e78fe3d482d376dc..1b9b13ee2a726848bbfe2bdd9d81a9a98f8b5940 100644 (file)
@@ -185,7 +185,10 @@ ifeq ($(ARCH),i386)
         ARCH := x86
 endif
 ifeq ($(ARCH),x86_64)
+       RAW_ARCH := x86_64
         ARCH := x86
+       ARCH_CFLAGS := -DARCH_X86_64
+       ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
 endif
 
 # CFLAGS and LDFLAGS are for the users to override from the command line.
@@ -375,6 +378,7 @@ LIB_H += util/include/linux/prefetch.h
 LIB_H += util/include/linux/rbtree.h
 LIB_H += util/include/linux/string.h
 LIB_H += util/include/linux/types.h
+LIB_H += util/include/linux/linkage.h
 LIB_H += util/include/asm/asm-offsets.h
 LIB_H += util/include/asm/bug.h
 LIB_H += util/include/asm/byteorder.h
@@ -383,6 +387,8 @@ LIB_H += util/include/asm/swab.h
 LIB_H += util/include/asm/system.h
 LIB_H += util/include/asm/uaccess.h
 LIB_H += util/include/dwarf-regs.h
+LIB_H += util/include/asm/dwarf2.h
+LIB_H += util/include/asm/cpufeature.h
 LIB_H += perf.h
 LIB_H += util/cache.h
 LIB_H += util/callchain.h
@@ -390,6 +396,7 @@ LIB_H += util/build-id.h
 LIB_H += util/debug.h
 LIB_H += util/debugfs.h
 LIB_H += util/event.h
+LIB_H += util/evsel.h
 LIB_H += util/exec_cmd.h
 LIB_H += util/types.h
 LIB_H += util/levenshtein.h
@@ -398,6 +405,7 @@ LIB_H += util/parse-options.h
 LIB_H += util/parse-events.h
 LIB_H += util/quote.h
 LIB_H += util/util.h
+LIB_H += util/xyarray.h
 LIB_H += util/header.h
 LIB_H += util/help.h
 LIB_H += util/session.h
@@ -417,6 +425,7 @@ LIB_H += util/probe-finder.h
 LIB_H += util/probe-event.h
 LIB_H += util/pstack.h
 LIB_H += util/cpumap.h
+LIB_H += $(ARCH_INCLUDE)
 
 LIB_OBJS += $(OUTPUT)util/abspath.o
 LIB_OBJS += $(OUTPUT)util/alias.o
@@ -426,6 +435,7 @@ LIB_OBJS += $(OUTPUT)util/ctype.o
 LIB_OBJS += $(OUTPUT)util/debugfs.o
 LIB_OBJS += $(OUTPUT)util/environment.o
 LIB_OBJS += $(OUTPUT)util/event.o
+LIB_OBJS += $(OUTPUT)util/evsel.o
 LIB_OBJS += $(OUTPUT)util/exec_cmd.o
 LIB_OBJS += $(OUTPUT)util/help.o
 LIB_OBJS += $(OUTPUT)util/levenshtein.o
@@ -463,6 +473,7 @@ LIB_OBJS += $(OUTPUT)util/sort.o
 LIB_OBJS += $(OUTPUT)util/hist.o
 LIB_OBJS += $(OUTPUT)util/probe-event.o
 LIB_OBJS += $(OUTPUT)util/util.o
+LIB_OBJS += $(OUTPUT)util/xyarray.o
 LIB_OBJS += $(OUTPUT)util/cpumap.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
@@ -472,6 +483,9 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
 # Benchmark modules
 BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
 BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
+ifeq ($(RAW_ARCH),x86_64)
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+endif
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
@@ -485,7 +499,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-report.o
 BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
 BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
 BUILTIN_OBJS += $(OUTPUT)builtin-top.o
-BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
+BUILTIN_OBJS += $(OUTPUT)builtin-script.o
 BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
 BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
 BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
@@ -507,7 +521,7 @@ PERFLIBS = $(LIB_FILE)
 -include config.mak
 
 ifndef NO_DWARF
-FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
+FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
 ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
        msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
        NO_DWARF := 1
@@ -554,7 +568,7 @@ ifndef NO_DWARF
 ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
        msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
 else
-       BASIC_CFLAGS += -I/usr/include/elfutils -DDWARF_SUPPORT
+       BASIC_CFLAGS += -DDWARF_SUPPORT
        EXTLIBS += -lelf -ldw
        LIB_OBJS += $(OUTPUT)util/probe-finder.o
 endif # PERF_HAVE_DWARF_REGS
@@ -891,13 +905,14 @@ prefix_SQ = $(subst ','\'',$(prefix))
 SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
 PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
 
-LIBS = $(PERFLIBS) $(EXTLIBS)
+LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS)
 
 BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
        $(COMPAT_CFLAGS)
 LIB_OBJS += $(COMPAT_OBJS)
 
 ALL_CFLAGS += $(BASIC_CFLAGS)
+ALL_CFLAGS += $(ARCH_CFLAGS)
 ALL_LDFLAGS += $(BASIC_LDFLAGS)
 
 export TAR INSTALL DESTDIR SHELL_PATH
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
new file mode 100644 (file)
index 0000000..a72e36c
--- /dev/null
@@ -0,0 +1,12 @@
+
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc)              \
+       extern void *fn(void *, const void *, size_t);
+
+#include "mem-memcpy-x86-64-asm-def.h"
+
+#undef MEMCPY_FN
+
+#endif
+
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
new file mode 100644 (file)
index 0000000..d588b87
--- /dev/null
@@ -0,0 +1,4 @@
+
+MEMCPY_FN(__memcpy,
+       "x86-64-unrolled",
+       "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
new file mode 100644 (file)
index 0000000..a57b66e
--- /dev/null
@@ -0,0 +1,2 @@
+
+#include "../../../arch/x86/lib/memcpy_64.S"
index 38dae7465142fdb86037cbae95a85a3ede9e24ed..db82021f4b91c7172a2fe43b4902e67f022af7b9 100644 (file)
@@ -12,6 +12,7 @@
 #include "../util/parse-options.h"
 #include "../util/header.h"
 #include "bench.h"
+#include "mem-memcpy-arch.h"
 
 #include <stdio.h>
 #include <stdlib.h>
 
 static const char      *length_str     = "1MB";
 static const char      *routine        = "default";
-static bool            use_clock       = false;
+static bool            use_clock;
 static int             clock_fd;
+static bool            only_prefault;
+static bool            no_prefault;
 
 static const struct option options[] = {
        OPT_STRING('l', "length", &length_str, "1MB",
@@ -34,19 +37,33 @@ static const struct option options[] = {
                    "Specify routine to copy"),
        OPT_BOOLEAN('c', "clock", &use_clock,
                    "Use CPU clock for measuring"),
+       OPT_BOOLEAN('o', "only-prefault", &only_prefault,
+                   "Show only the result with page faults before memcpy()"),
+       OPT_BOOLEAN('n', "no-prefault", &no_prefault,
+                   "Show only the result without page faults before memcpy()"),
        OPT_END()
 };
 
+typedef void *(*memcpy_t)(void *, const void *, size_t);
+
 struct routine {
        const char *name;
        const char *desc;
-       void * (*fn)(void *dst, const void *src, size_t len);
+       memcpy_t fn;
 };
 
 struct routine routines[] = {
        { "default",
          "Default memcpy() provided by glibc",
          memcpy },
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
+#include "mem-memcpy-x86-64-asm-def.h"
+#undef MEMCPY_FN
+
+#endif
+
        { NULL,
          NULL,
          NULL   }
@@ -89,29 +106,98 @@ static double timeval2double(struct timeval *ts)
                (double)ts->tv_usec / (double)1000000;
 }
 
+static void alloc_mem(void **dst, void **src, size_t length)
+{
+       *dst = zalloc(length);
+       if (!dst)
+               die("memory allocation failed - maybe length is too large?\n");
+
+       *src = zalloc(length);
+       if (!src)
+               die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
+{
+       u64 clock_start = 0ULL, clock_end = 0ULL;
+       void *src = NULL, *dst = NULL;
+
+       alloc_mem(&src, &dst, len);
+
+       if (prefault)
+               fn(dst, src, len);
+
+       clock_start = get_clock();
+       fn(dst, src, len);
+       clock_end = get_clock();
+
+       free(src);
+       free(dst);
+       return clock_end - clock_start;
+}
+
+static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
+{
+       struct timeval tv_start, tv_end, tv_diff;
+       void *src = NULL, *dst = NULL;
+
+       alloc_mem(&src, &dst, len);
+
+       if (prefault)
+               fn(dst, src, len);
+
+       BUG_ON(gettimeofday(&tv_start, NULL));
+       fn(dst, src, len);
+       BUG_ON(gettimeofday(&tv_end, NULL));
+
+       timersub(&tv_end, &tv_start, &tv_diff);
+
+       free(src);
+       free(dst);
+       return (double)((double)len / timeval2double(&tv_diff));
+}
+
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do {                                      \
+               if (x < K)                                      \
+                       printf(" %14lf B/Sec", x);              \
+               else if (x < K * K)                             \
+                       printf(" %14lfd KB/Sec", x / K);        \
+               else if (x < K * K * K)                         \
+                       printf(" %14lf MB/Sec", x / K / K);     \
+               else                                            \
+                       printf(" %14lf GB/Sec", x / K / K / K); \
+       } while (0)
+
 int bench_mem_memcpy(int argc, const char **argv,
                     const char *prefix __used)
 {
        int i;
-       void *dst, *src;
-       size_t length;
-       double bps = 0.0;
-       struct timeval tv_start, tv_end, tv_diff;
-       u64 clock_start, clock_end, clock_diff;
+       size_t len;
+       double result_bps[2];
+       u64 result_clock[2];
 
-       clock_start = clock_end = clock_diff = 0ULL;
        argc = parse_options(argc, argv, options,
                             bench_mem_memcpy_usage, 0);
 
-       tv_diff.tv_sec = 0;
-       tv_diff.tv_usec = 0;
-       length = (size_t)perf_atoll((char *)length_str);
+       if (use_clock)
+               init_clock();
+
+       len = (size_t)perf_atoll((char *)length_str);
 
-       if ((s64)length <= 0) {
+       result_clock[0] = result_clock[1] = 0ULL;
+       result_bps[0] = result_bps[1] = 0.0;
+
+       if ((s64)len <= 0) {
                fprintf(stderr, "Invalid length:%s\n", length_str);
                return 1;
        }
 
+       /* same to without specifying either of prefault and no-prefault */
+       if (only_prefault && no_prefault)
+               only_prefault = no_prefault = false;
+
        for (i = 0; routines[i].name; i++) {
                if (!strcmp(routines[i].name, routine))
                        break;
@@ -126,61 +212,80 @@ int bench_mem_memcpy(int argc, const char **argv,
                return 1;
        }
 
-       dst = zalloc(length);
-       if (!dst)
-               die("memory allocation failed - maybe length is too large?\n");
-
-       src = zalloc(length);
-       if (!src)
-               die("memory allocation failed - maybe length is too large?\n");
-
-       if (bench_format == BENCH_FORMAT_DEFAULT) {
-               printf("# Copying %s Bytes from %p to %p ...\n\n",
-                      length_str, src, dst);
-       }
-
-       if (use_clock) {
-               init_clock();
-               clock_start = get_clock();
-       } else {
-               BUG_ON(gettimeofday(&tv_start, NULL));
-       }
-
-       routines[i].fn(dst, src, length);
+       if (bench_format == BENCH_FORMAT_DEFAULT)
+               printf("# Copying %s Bytes ...\n\n", length_str);
 
-       if (use_clock) {
-               clock_end = get_clock();
-               clock_diff = clock_end - clock_start;
+       if (!only_prefault && !no_prefault) {
+               /* show both of results */
+               if (use_clock) {
+                       result_clock[0] =
+                               do_memcpy_clock(routines[i].fn, len, false);
+                       result_clock[1] =
+                               do_memcpy_clock(routines[i].fn, len, true);
+               } else {
+                       result_bps[0] =
+                               do_memcpy_gettimeofday(routines[i].fn,
+                                               len, false);
+                       result_bps[1] =
+                               do_memcpy_gettimeofday(routines[i].fn,
+                                               len, true);
+               }
        } else {
-               BUG_ON(gettimeofday(&tv_end, NULL));
-               timersub(&tv_end, &tv_start, &tv_diff);
-               bps = (double)((double)length / timeval2double(&tv_diff));
+               if (use_clock) {
+                       result_clock[pf] =
+                               do_memcpy_clock(routines[i].fn,
+                                               len, only_prefault);
+               } else {
+                       result_bps[pf] =
+                               do_memcpy_gettimeofday(routines[i].fn,
+                                               len, only_prefault);
+               }
        }
 
        switch (bench_format) {
        case BENCH_FORMAT_DEFAULT:
-               if (use_clock) {
-                       printf(" %14lf Clock/Byte\n",
-                              (double)clock_diff / (double)length);
-               } else {
-                       if (bps < K)
-                               printf(" %14lf B/Sec\n", bps);
-                       else if (bps < K * K)
-                               printf(" %14lfd KB/Sec\n", bps / 1024);
-                       else if (bps < K * K * K)
-                               printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
-                       else {
-                               printf(" %14lf GB/Sec\n",
-                                      bps / 1024 / 1024 / 1024);
+               if (!only_prefault && !no_prefault) {
+                       if (use_clock) {
+                               printf(" %14lf Clock/Byte\n",
+                                       (double)result_clock[0]
+                                       / (double)len);
+                               printf(" %14lf Clock/Byte (with prefault)\n",
+                                       (double)result_clock[1]
+                                       / (double)len);
+                       } else {
+                               print_bps(result_bps[0]);
+                               printf("\n");
+                               print_bps(result_bps[1]);
+                               printf(" (with prefault)\n");
                        }
+               } else {
+                       if (use_clock) {
+                               printf(" %14lf Clock/Byte",
+                                       (double)result_clock[pf]
+                                       / (double)len);
+                       } else
+                               print_bps(result_bps[pf]);
+
+                       printf("%s\n", only_prefault ? " (with prefault)" : "");
                }
                break;
        case BENCH_FORMAT_SIMPLE:
-               if (use_clock) {
-                       printf("%14lf\n",
-                              (double)clock_diff / (double)length);
-               } else
-                       printf("%lf\n", bps);
+               if (!only_prefault && !no_prefault) {
+                       if (use_clock) {
+                               printf("%lf %lf\n",
+                                       (double)result_clock[0] / (double)len,
+                                       (double)result_clock[1] / (double)len);
+                       } else {
+                               printf("%lf %lf\n",
+                                       result_bps[0], result_bps[1]);
+                       }
+               } else {
+                       if (use_clock) {
+                               printf("%lf\n", (double)result_clock[pf]
+                                       / (double)len);
+                       } else
+                               printf("%lf\n", result_bps[pf]);
+               }
                break;
        default:
                /* reaching this means there's some disaster: */
index 6d5604d8df9599acb55d87017f5d58e19d906395..c056cdc0691258b159665ca3e8c74d2963543ccf 100644 (file)
@@ -58,12 +58,12 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
        return hist_entry__inc_addr_samples(he, al->addr);
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
        struct addr_location al;
-       struct sample_data data;
 
-       if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+       if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
                pr_warning("problem processing %d event, skipping it.\n",
                           event->header.type);
                return -1;
@@ -375,6 +375,8 @@ static struct perf_event_ops event_ops = {
        .mmap   = event__process_mmap,
        .comm   = event__process_comm,
        .fork   = event__process_task,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 static int __cmd_annotate(void)
@@ -382,7 +384,7 @@ static int __cmd_annotate(void)
        int ret;
        struct perf_session *session;
 
-       session = perf_session__new(input_name, O_RDONLY, force, false);
+       session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
index 44a47e13bd673eb362f956b80bd02adbec367d45..5af32ae9031ec83fc2db926df9360d1ce8f1620c 100644 (file)
@@ -36,10 +36,10 @@ static const struct option options[] = {
 
 static int __cmd_buildid_list(void)
 {
-       int err = -1;
        struct perf_session *session;
 
-       session = perf_session__new(input_name, O_RDONLY, force, false);
+       session = perf_session__new(input_name, O_RDONLY, force, false,
+                                   &build_id__mark_dso_hit_ops);
        if (session == NULL)
                return -1;
 
@@ -49,7 +49,7 @@ static int __cmd_buildid_list(void)
        perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
 
        perf_session__delete(session);
-       return err;
+       return 0;
 }
 
 int cmd_buildid_list(int argc, const char **argv, const char *prefix __used)
index fca1d4402910ab13a6f7aa45299e31c0289cb27c..3153e492dbcc29e1593b6df29357424dd012da99 100644 (file)
@@ -30,12 +30,13 @@ static int hists__add_entry(struct hists *self,
        return -ENOMEM;
 }
 
-static int diff__process_sample_event(event_t *event, struct perf_session *session)
+static int diff__process_sample_event(event_t *event,
+                                     struct sample_data *sample,
+                                     struct perf_session *session)
 {
        struct addr_location al;
-       struct sample_data data = { .period = 1, };
 
-       if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+       if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
                pr_warning("problem processing %d event, skipping it.\n",
                           event->header.type);
                return -1;
@@ -44,12 +45,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
        if (al.filtered || al.sym == NULL)
                return 0;
 
-       if (hists__add_entry(&session->hists, &al, data.period)) {
+       if (hists__add_entry(&session->hists, &al, sample->period)) {
                pr_warning("problem incrementing symbol period, skipping event\n");
                return -1;
        }
 
-       session->hists.stats.total_period += data.period;
+       session->hists.stats.total_period += sample->period;
        return 0;
 }
 
@@ -60,6 +61,8 @@ static struct perf_event_ops event_ops = {
        .exit   = event__process_task,
        .fork   = event__process_task,
        .lost   = event__process_lost,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
@@ -141,8 +144,8 @@ static int __cmd_diff(void)
        int ret, i;
        struct perf_session *session[2];
 
-       session[0] = perf_session__new(input_old, O_RDONLY, force, false);
-       session[1] = perf_session__new(input_new, O_RDONLY, force, false);
+       session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops);
+       session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops);
        if (session[0] == NULL || session[1] == NULL)
                return -ENOMEM;
 
@@ -173,7 +176,7 @@ static const char * const diff_usage[] = {
 static const struct option options[] = {
        OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show symbol address, etc)"),
-       OPT_BOOLEAN('m', "displacement", &show_displacement,
+       OPT_BOOLEAN('M', "displacement", &show_displacement,
                    "Show position displacement relative to baseline"),
        OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
                    "dump raw trace in ASCII"),
@@ -191,6 +194,8 @@ static const struct option options[] = {
        OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
                   "separator for columns, no spaces will be added between "
                   "columns '.' is reserved."),
+       OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+                   "Look for files with symbols relative to this directory"),
        OPT_END()
 };
 
index 8e3e47b064cea7ddea4e98f0fb98202b35643632..0c78ffa7bf675f46c9e631d9fa8d51fbc71aded4 100644 (file)
@@ -16,8 +16,8 @@
 static char            const *input_name = "-";
 static bool            inject_build_ids;
 
-static int event__repipe(event_t *event __used,
-                        struct perf_session *session __used)
+static int event__repipe_synth(event_t *event,
+                              struct perf_session *session __used)
 {
        uint32_t size;
        void *buf = event;
@@ -36,22 +36,30 @@ static int event__repipe(event_t *event __used,
        return 0;
 }
 
-static int event__repipe_mmap(event_t *self, struct perf_session *session)
+static int event__repipe(event_t *event, struct sample_data *sample __used,
+                        struct perf_session *session)
+{
+       return event__repipe_synth(event, session);
+}
+
+static int event__repipe_mmap(event_t *self, struct sample_data *sample,
+                             struct perf_session *session)
 {
        int err;
 
-       err = event__process_mmap(self, session);
-       event__repipe(self, session);
+       err = event__process_mmap(self, sample, session);
+       event__repipe(self, sample, session);
 
        return err;
 }
 
-static int event__repipe_task(event_t *self, struct perf_session *session)
+static int event__repipe_task(event_t *self, struct sample_data *sample,
+                             struct perf_session *session)
 {
        int err;
 
-       err = event__process_task(self, session);
-       event__repipe(self, session);
+       err = event__process_task(self, sample, session);
+       event__repipe(self, sample, session);
 
        return err;
 }
@@ -61,7 +69,7 @@ static int event__repipe_tracing_data(event_t *self,
 {
        int err;
 
-       event__repipe(self, session);
+       event__repipe_synth(self, session);
        err = event__process_tracing_data(self, session);
 
        return err;
@@ -111,7 +119,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
        return 0;
 }
 
-static int event__inject_buildid(event_t *event, struct perf_session *session)
+static int event__inject_buildid(event_t *event, struct sample_data *sample,
+                                struct perf_session *session)
 {
        struct addr_location al;
        struct thread *thread;
@@ -146,7 +155,7 @@ static int event__inject_buildid(event_t *event, struct perf_session *session)
        }
 
 repipe:
-       event__repipe(event, session);
+       event__repipe(event, sample, session);
        return 0;
 }
 
@@ -160,10 +169,10 @@ struct perf_event_ops inject_ops = {
        .read           = event__repipe,
        .throttle       = event__repipe,
        .unthrottle     = event__repipe,
-       .attr           = event__repipe,
-       .event_type     = event__repipe,
-       .tracing_data   = event__repipe,
-       .build_id       = event__repipe,
+       .attr           = event__repipe_synth,
+       .event_type     = event__repipe_synth,
+       .tracing_data   = event__repipe_synth,
+       .build_id       = event__repipe_synth,
 };
 
 extern volatile int session_done;
@@ -187,7 +196,7 @@ static int __cmd_inject(void)
                inject_ops.tracing_data = event__repipe_tracing_data;
        }
 
-       session = perf_session__new(input_name, O_RDONLY, false, true);
+       session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
        if (session == NULL)
                return -ENOMEM;
 
index 31f60a2535e0ec95b60e6b90e3e24818fa0dd972..def7ddc2fd4fbc1b1c57795f4c10c519729b035d 100644 (file)
@@ -304,22 +304,11 @@ process_raw_event(event_t *raw_event __used, void *data,
        }
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data;
-       struct thread *thread;
+       struct thread *thread = perf_session__findnew(session, event->ip.pid);
 
-       memset(&data, 0, sizeof(data));
-       data.time = -1;
-       data.cpu = -1;
-       data.period = 1;
-
-       event__parse_sample(event, session->sample_type, &data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-                   data.pid, data.tid, data.ip, data.period);
-
-       thread = perf_session__findnew(session, event->ip.pid);
        if (thread == NULL) {
                pr_debug("problem processing %d event, skipping it.\n",
                         event->header.type);
@@ -328,8 +317,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
        dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-       process_raw_event(event, data.raw_data, data.cpu,
-                         data.time, thread);
+       process_raw_event(event, sample->raw_data, sample->cpu,
+                         sample->time, thread);
 
        return 0;
 }
@@ -492,7 +481,8 @@ static void sort_result(void)
 static int __cmd_kmem(void)
 {
        int err = -EINVAL;
-       struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+       struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+                                                        0, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
@@ -747,6 +737,9 @@ static int __cmd_record(int argc, const char **argv)
        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+       if (rec_argv == NULL)
+               return -ENOMEM;
+
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
                rec_argv[i] = strdup(record_args[i]);
 
index 821c1586a22b7da92cd732ad1a4ee4e05b037085..b9c6e54329713e326d74da9b08164e562e521d03 100644 (file)
@@ -834,22 +834,18 @@ static void dump_info(void)
                die("Unknown type of information\n");
 }
 
-static int process_sample_event(event_t *self, struct perf_session *s)
+static int process_sample_event(event_t *self, struct sample_data *sample,
+                               struct perf_session *s)
 {
-       struct sample_data data;
-       struct thread *thread;
+       struct thread *thread = perf_session__findnew(s, sample->tid);
 
-       bzero(&data, sizeof(data));
-       event__parse_sample(self, s->sample_type, &data);
-
-       thread = perf_session__findnew(s, data.tid);
        if (thread == NULL) {
                pr_debug("problem processing %d event, skipping it.\n",
                        self->header.type);
                return -1;
        }
 
-       process_raw_event(data.raw_data, data.cpu, data.time, thread);
+       process_raw_event(sample->raw_data, sample->cpu, sample->time, thread);
 
        return 0;
 }
@@ -862,7 +858,7 @@ static struct perf_event_ops eops = {
 
 static int read_events(void)
 {
-       session = perf_session__new(input_name, O_RDONLY, 0, false);
+       session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
        if (!session)
                die("Initializing perf session failed\n");
 
@@ -947,6 +943,9 @@ static int __cmd_record(int argc, const char **argv)
        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+       if (rec_argv == NULL)
+               return -ENOMEM;
+
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
                rec_argv[i] = strdup(record_args[i]);
 
@@ -982,9 +981,9 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used)
                                usage_with_options(report_usage, report_options);
                }
                __cmd_report();
-       } else if (!strcmp(argv[0], "trace")) {
-               /* Aliased to 'perf trace' */
-               return cmd_trace(argc, argv, prefix);
+       } else if (!strcmp(argv[0], "script")) {
+               /* Aliased to 'perf script' */
+               return cmd_script(argc, argv, prefix);
        } else if (!strcmp(argv[0], "info")) {
                if (argc) {
                        argc = parse_options(argc, argv,
index 2e000c068cc5a377d87923bb302a383abafd3a33..add163c9f0e7d4db3fe01afe704a72c5a923597b 100644 (file)
@@ -249,6 +249,11 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
             !params.show_lines))
                usage_with_options(probe_usage, options);
 
+       /*
+        * Only consider the user's kernel image path if given.
+        */
+       symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+
        if (params.list_events) {
                if (params.mod_events) {
                        pr_err("  Error: Don't use --list with --add/--del.\n");
index 564491fa18b27838dd79125954bc744f51f7fe2c..7bc0490354847a949d2f2105cca86e67f8e0b1b4 100644 (file)
@@ -18,6 +18,7 @@
 
 #include "util/header.h"
 #include "util/event.h"
+#include "util/evsel.h"
 #include "util/debug.h"
 #include "util/session.h"
 #include "util/symbol.h"
 #include <sched.h>
 #include <sys/mman.h>
 
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
 enum write_mode_t {
        WRITE_FORCE,
        WRITE_APPEND
 };
 
-static int                     *fd[MAX_NR_CPUS][MAX_COUNTERS];
-
 static u64                     user_interval                   = ULLONG_MAX;
 static u64                     default_interval                =      0;
+static u64                     sample_type;
 
-static int                     nr_cpus                         =      0;
+static struct cpu_map          *cpus;
 static unsigned int            page_size;
 static unsigned int            mmap_pages                      =    128;
 static unsigned int            user_freq                       = UINT_MAX;
@@ -48,11 +50,11 @@ static const char           *output_name                    = "perf.data";
 static int                     group                           =      0;
 static int                     realtime_prio                   =      0;
 static bool                    raw_samples                     =  false;
+static bool                    sample_id_all_avail             =   true;
 static bool                    system_wide                     =  false;
 static pid_t                   target_pid                      =     -1;
 static pid_t                   target_tid                      =     -1;
-static pid_t                   *all_tids                       =      NULL;
-static int                     thread_num                      =      0;
+static struct thread_map       *threads;
 static pid_t                   child_pid                       =     -1;
 static bool                    no_inherit                      =  false;
 static enum write_mode_t       write_mode                      = WRITE_FORCE;
@@ -60,7 +62,9 @@ static bool                   call_graph                      =  false;
 static bool                    inherit_stat                    =  false;
 static bool                    no_samples                      =  false;
 static bool                    sample_address                  =  false;
+static bool                    sample_time                     =  false;
 static bool                    no_buildid                      =  false;
+static bool                    no_buildid_cache                =  false;
 
 static long                    samples                         =      0;
 static u64                     bytes_written                   =      0;
@@ -77,7 +81,6 @@ static struct perf_session    *session;
 static const char              *cpu_list;
 
 struct mmap_data {
-       int                     counter;
        void                    *base;
        unsigned int            mask;
        unsigned int            prev;
@@ -128,6 +131,7 @@ static void write_output(void *buf, size_t size)
 }
 
 static int process_synthesized_event(event_t *event,
+                                    struct sample_data *sample __used,
                                     struct perf_session *self __used)
 {
        write_output(event, event->header.size);
@@ -224,12 +228,12 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n
        return h_attr;
 }
 
-static void create_counter(int counter, int cpu)
+static void create_counter(struct perf_evsel *evsel, int cpu)
 {
-       char *filter = filters[counter];
-       struct perf_event_attr *attr = attrs + counter;
+       char *filter = evsel->filter;
+       struct perf_event_attr *attr = &evsel->attr;
        struct perf_header_attr *h_attr;
-       int track = !counter; /* only the first counter needs these */
+       int track = !evsel->idx; /* only the first counter needs these */
        int thread_index;
        int ret;
        struct {
@@ -238,6 +242,19 @@ static void create_counter(int counter, int cpu)
                u64 time_running;
                u64 id;
        } read_data;
+       /*
+        * Check if parse_single_tracepoint_event has already asked for
+        * PERF_SAMPLE_TIME.
+        *
+        * XXX this is kludgy but short term fix for problems introduced by
+        * eac23d1c that broke 'perf script' by having different sample_types
+        * when using multiple tracepoint events when we use a perf binary
+        * that tries to use sample_id_all on an older kernel.
+        *
+        * We need to move counter creation to perf_session, support
+        * different sample_types, etc.
+        */
+       bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
 
        attr->read_format       = PERF_FORMAT_TOTAL_TIME_ENABLED |
                                  PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -280,6 +297,10 @@ static void create_counter(int counter, int cpu)
        if (system_wide)
                attr->sample_type       |= PERF_SAMPLE_CPU;
 
+       if (sample_id_all_avail &&
+           (sample_time || system_wide || !no_inherit || cpu_list))
+               attr->sample_type       |= PERF_SAMPLE_TIME;
+
        if (raw_samples) {
                attr->sample_type       |= PERF_SAMPLE_TIME;
                attr->sample_type       |= PERF_SAMPLE_RAW;
@@ -293,13 +314,14 @@ static void create_counter(int counter, int cpu)
                attr->disabled = 1;
                attr->enable_on_exec = 1;
        }
+retry_sample_id:
+       attr->sample_id_all = sample_id_all_avail ? 1 : 0;
 
-       for (thread_index = 0; thread_index < thread_num; thread_index++) {
+       for (thread_index = 0; thread_index < threads->nr; thread_index++) {
 try_again:
-               fd[nr_cpu][counter][thread_index] = sys_perf_event_open(attr,
-                               all_tids[thread_index], cpu, group_fd, 0);
+               FD(evsel, nr_cpu, thread_index) = sys_perf_event_open(attr, threads->map[thread_index], cpu, group_fd, 0);
 
-               if (fd[nr_cpu][counter][thread_index] < 0) {
+               if (FD(evsel, nr_cpu, thread_index) < 0) {
                        int err = errno;
 
                        if (err == EPERM || err == EACCES)
@@ -309,6 +331,15 @@ try_again:
                        else if (err ==  ENODEV && cpu_list) {
                                die("No such device - did you specify"
                                        " an out-of-range profile CPU?\n");
+                       } else if (err == EINVAL && sample_id_all_avail) {
+                               /*
+                                * Old kernel, no attr->sample_id_type_all field
+                                */
+                               sample_id_all_avail = false;
+                               if (!sample_time && !raw_samples && !time_needed)
+                                       attr->sample_type &= ~PERF_SAMPLE_TIME;
+
+                               goto retry_sample_id;
                        }
 
                        /*
@@ -326,8 +357,8 @@ try_again:
                                goto try_again;
                        }
                        printf("\n");
-                       error("perfcounter syscall returned with %d (%s)\n",
-                                       fd[nr_cpu][counter][thread_index], strerror(err));
+                       error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
+                             FD(evsel, nr_cpu, thread_index), strerror(err));
 
 #if defined(__i386__) || defined(__x86_64__)
                        if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
@@ -341,7 +372,7 @@ try_again:
                        exit(-1);
                }
 
-               h_attr = get_header_attr(attr, counter);
+               h_attr = get_header_attr(attr, evsel->idx);
                if (h_attr == NULL)
                        die("nomem\n");
 
@@ -352,7 +383,7 @@ try_again:
                        }
                }
 
-               if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) {
+               if (read(FD(evsel, nr_cpu, thread_index), &read_data, sizeof(read_data)) == -1) {
                        perror("Unable to read perf file descriptor");
                        exit(-1);
                }
@@ -362,43 +393,44 @@ try_again:
                        exit(-1);
                }
 
-               assert(fd[nr_cpu][counter][thread_index] >= 0);
-               fcntl(fd[nr_cpu][counter][thread_index], F_SETFL, O_NONBLOCK);
+               assert(FD(evsel, nr_cpu, thread_index) >= 0);
+               fcntl(FD(evsel, nr_cpu, thread_index), F_SETFL, O_NONBLOCK);
 
                /*
                 * First counter acts as the group leader:
                 */
                if (group && group_fd == -1)
-                       group_fd = fd[nr_cpu][counter][thread_index];
-
-               if (counter || thread_index) {
-                       ret = ioctl(fd[nr_cpu][counter][thread_index],
-                                       PERF_EVENT_IOC_SET_OUTPUT,
-                                       fd[nr_cpu][0][0]);
+                       group_fd = FD(evsel, nr_cpu, thread_index);
+
+               if (evsel->idx || thread_index) {
+                       struct perf_evsel *first;
+                       first = list_entry(evsel_list.next, struct perf_evsel, node);
+                       ret = ioctl(FD(evsel, nr_cpu, thread_index),
+                                   PERF_EVENT_IOC_SET_OUTPUT,
+                                   FD(first, nr_cpu, 0));
                        if (ret) {
                                error("failed to set output: %d (%s)\n", errno,
                                                strerror(errno));
                                exit(-1);
                        }
                } else {
-                       mmap_array[nr_cpu].counter = counter;
                        mmap_array[nr_cpu].prev = 0;
                        mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
                        mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
-                               PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
+                               PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, nr_cpu, thread_index), 0);
                        if (mmap_array[nr_cpu].base == MAP_FAILED) {
                                error("failed to mmap with %d (%s)\n", errno, strerror(errno));
                                exit(-1);
                        }
 
-                       event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
+                       event_array[nr_poll].fd = FD(evsel, nr_cpu, thread_index);
                        event_array[nr_poll].events = POLLIN;
                        nr_poll++;
                }
 
                if (filter != NULL) {
-                       ret = ioctl(fd[nr_cpu][counter][thread_index],
-                                       PERF_EVENT_IOC_SET_FILTER, filter);
+                       ret = ioctl(FD(evsel, nr_cpu, thread_index),
+                                   PERF_EVENT_IOC_SET_FILTER, filter);
                        if (ret) {
                                error("failed to set filter with %d (%s)\n", errno,
                                                strerror(errno));
@@ -406,15 +438,19 @@ try_again:
                        }
                }
        }
+
+       if (!sample_type)
+               sample_type = attr->sample_type;
 }
 
 static void open_counters(int cpu)
 {
-       int counter;
+       struct perf_evsel *pos;
 
        group_fd = -1;
-       for (counter = 0; counter < nr_counters; counter++)
-               create_counter(counter, cpu);
+
+       list_for_each_entry(pos, &evsel_list, node)
+               create_counter(pos, cpu);
 
        nr_cpu++;
 }
@@ -437,7 +473,8 @@ static void atexit_header(void)
        if (!pipe_output) {
                session->header.data_size += bytes_written;
 
-               process_buildids();
+               if (!no_buildid)
+                       process_buildids();
                perf_header__write(&session->header, output, true);
                perf_session__delete(session);
                symbol__exit();
@@ -500,7 +537,7 @@ static void mmap_read_all(void)
 
 static int __cmd_record(int argc, const char **argv)
 {
-       int i, counter;
+       int i;
        struct stat st;
        int flags;
        int err;
@@ -552,19 +589,22 @@ static int __cmd_record(int argc, const char **argv)
        }
 
        session = perf_session__new(output_name, O_WRONLY,
-                                   write_mode == WRITE_FORCE, false);
+                                   write_mode == WRITE_FORCE, false, NULL);
        if (session == NULL) {
                pr_err("Not enough memory for reading perf file header\n");
                return -1;
        }
 
+       if (!no_buildid)
+               perf_header__set_feat(&session->header, HEADER_BUILD_ID);
+
        if (!file_new) {
                err = perf_header__read(session, output);
                if (err < 0)
                        goto out_delete_session;
        }
 
-       if (have_tracepoints(attrs, nr_counters))
+       if (have_tracepoints(&evsel_list))
                perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
 
        /*
@@ -612,7 +652,7 @@ static int __cmd_record(int argc, const char **argv)
                }
 
                if (!system_wide && target_tid == -1 && target_pid == -1)
-                       all_tids[0] = child_pid;
+                       threads->map[0] = child_pid;
 
                close(child_ready_pipe[1]);
                close(go_pipe[0]);
@@ -626,19 +666,15 @@ static int __cmd_record(int argc, const char **argv)
                close(child_ready_pipe[0]);
        }
 
-       nr_cpus = read_cpu_map(cpu_list);
-       if (nr_cpus < 1) {
-               perror("failed to collect number of CPUs");
-               return -1;
-       }
-
        if (!system_wide && no_inherit && !cpu_list) {
                open_counters(-1);
        } else {
-               for (i = 0; i < nr_cpus; i++)
-                       open_counters(cpumap[i]);
+               for (i = 0; i < cpus->nr; i++)
+                       open_counters(cpus->map[i]);
        }
 
+       perf_session__set_sample_type(session, sample_type);
+
        if (pipe_output) {
                err = perf_header__write_pipe(output);
                if (err < 0)
@@ -651,6 +687,8 @@ static int __cmd_record(int argc, const char **argv)
 
        post_processing_offset = lseek(output, 0, SEEK_CUR);
 
+       perf_session__set_sample_id_all(session, sample_id_all_avail);
+
        if (pipe_output) {
                err = event__synthesize_attrs(&session->header,
                                              process_synthesized_event,
@@ -667,7 +705,7 @@ static int __cmd_record(int argc, const char **argv)
                        return err;
                }
 
-               if (have_tracepoints(attrs, nr_counters)) {
+               if (have_tracepoints(&evsel_list)) {
                        /*
                         * FIXME err <= 0 here actually means that
                         * there were no tracepoints so its not really
@@ -676,8 +714,7 @@ static int __cmd_record(int argc, const char **argv)
                         * return this more properly and also
                         * propagate errors that now are calling die()
                         */
-                       err = event__synthesize_tracing_data(output, attrs,
-                                                            nr_counters,
+                       err = event__synthesize_tracing_data(output, &evsel_list,
                                                             process_synthesized_event,
                                                             session);
                        if (err <= 0) {
@@ -751,13 +788,13 @@ static int __cmd_record(int argc, const char **argv)
 
                if (done) {
                        for (i = 0; i < nr_cpu; i++) {
-                               for (counter = 0;
-                                       counter < nr_counters;
-                                       counter++) {
+                               struct perf_evsel *pos;
+
+                               list_for_each_entry(pos, &evsel_list, node) {
                                        for (thread = 0;
-                                               thread < thread_num;
+                                               thread < threads->nr;
                                                thread++)
-                                               ioctl(fd[i][counter][thread],
+                                               ioctl(FD(pos, i, thread),
                                                        PERF_EVENT_IOC_DISABLE);
                                }
                        }
@@ -831,16 +868,20 @@ const struct option record_options[] = {
                    "per thread counts"),
        OPT_BOOLEAN('d', "data", &sample_address,
                    "Sample addresses"),
+       OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
        OPT_BOOLEAN('n', "no-samples", &no_samples,
                    "don't sample"),
-       OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid,
+       OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
                    "do not update the buildid cache"),
+       OPT_BOOLEAN('B', "no-buildid", &no_buildid,
+                   "do not collect buildids in perf.data"),
        OPT_END()
 };
 
 int cmd_record(int argc, const char **argv, const char *prefix __used)
 {
-       int i, j, err = -ENOMEM;
+       int err = -ENOMEM;
+       struct perf_evsel *pos;
 
        argc = parse_options(argc, argv, record_options, record_usage,
                            PARSE_OPT_STOP_AT_NON_OPTION);
@@ -859,41 +900,36 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
        }
 
        symbol__init();
-       if (no_buildid)
+
+       if (no_buildid_cache || no_buildid)
                disable_buildid_cache();
 
-       if (!nr_counters) {
-               nr_counters     = 1;
-               attrs[0].type   = PERF_TYPE_HARDWARE;
-               attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
+       if (list_empty(&evsel_list) && perf_evsel_list__create_default() < 0) {
+               pr_err("Not enough memory for event selector list\n");
+               goto out_symbol_exit;
        }
 
-       if (target_pid != -1) {
+       if (target_pid != -1)
                target_tid = target_pid;
-               thread_num = find_all_tid(target_pid, &all_tids);
-               if (thread_num <= 0) {
-                       fprintf(stderr, "Can't find all threads of pid %d\n",
-                                       target_pid);
-                       usage_with_options(record_usage, record_options);
-               }
-       } else {
-               all_tids=malloc(sizeof(pid_t));
-               if (!all_tids)
-                       goto out_symbol_exit;
 
-               all_tids[0] = target_tid;
-               thread_num = 1;
+       threads = thread_map__new(target_pid, target_tid);
+       if (threads == NULL) {
+               pr_err("Problems finding threads of monitor\n");
+               usage_with_options(record_usage, record_options);
        }
 
-       for (i = 0; i < MAX_NR_CPUS; i++) {
-               for (j = 0; j < MAX_COUNTERS; j++) {
-                       fd[i][j] = malloc(sizeof(int)*thread_num);
-                       if (!fd[i][j])
-                               goto out_free_fd;
-               }
+       cpus = cpu_map__new(cpu_list);
+       if (cpus == NULL) {
+               perror("failed to parse CPUs map");
+               return -1;
        }
-       event_array = malloc(
-               sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num);
+
+       list_for_each_entry(pos, &evsel_list, node) {
+               if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+                       goto out_free_fd;
+       }
+       event_array = malloc((sizeof(struct pollfd) * MAX_NR_CPUS *
+                             MAX_COUNTERS * threads->nr));
        if (!event_array)
                goto out_free_fd;
 
@@ -920,12 +956,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
 out_free_event_array:
        free(event_array);
 out_free_fd:
-       for (i = 0; i < MAX_NR_CPUS; i++) {
-               for (j = 0; j < MAX_COUNTERS; j++)
-                       free(fd[i][j]);
-       }
-       free(all_tids);
-       all_tids = NULL;
+       thread_map__delete(threads);
+       threads = NULL;
 out_symbol_exit:
        symbol__exit();
        return err;
index 5de405d452300318541338293563d8ebc41ccb87..75183a4518e60d23db05e36e585fc178df731a19 100644 (file)
@@ -150,13 +150,13 @@ static int add_event_total(struct perf_session *session,
        return 0;
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data = { .period = 1, };
        struct addr_location al;
        struct perf_event_attr *attr;
 
-       if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+       if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
                fprintf(stderr, "problem processing %d event, skipping it.\n",
                        event->header.type);
                return -1;
@@ -165,14 +165,14 @@ static int process_sample_event(event_t *event, struct perf_session *session)
        if (al.filtered || (hide_unresolved && al.sym == NULL))
                return 0;
 
-       if (perf_session__add_hist_entry(session, &al, &data)) {
+       if (perf_session__add_hist_entry(session, &al, sample)) {
                pr_debug("problem incrementing symbol period, skipping event\n");
                return -1;
        }
 
-       attr = perf_header__find_attr(data.id, &session->header);
+       attr = perf_header__find_attr(sample->id, &session->header);
 
-       if (add_event_total(session, &data, attr)) {
+       if (add_event_total(session, sample, attr)) {
                pr_debug("problem adding event period\n");
                return -1;
        }
@@ -180,7 +180,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
        return 0;
 }
 
-static int process_read_event(event_t *event, struct perf_session *session __used)
+static int process_read_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        struct perf_event_attr *attr;
 
@@ -243,6 +244,8 @@ static struct perf_event_ops event_ops = {
        .event_type = event__process_event_type,
        .tracing_data = event__process_tracing_data,
        .build_id = event__process_build_id,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 extern volatile int session_done;
@@ -307,7 +310,7 @@ static int __cmd_report(void)
 
        signal(SIGINT, sig_handler);
 
-       session = perf_session__new(input_name, O_RDONLY, force, false);
+       session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
@@ -442,6 +445,8 @@ static const struct option options[] = {
                    "dump raw trace in ASCII"),
        OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
                   "file", "vmlinux pathname"),
+       OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+                  "file", "kallsyms pathname"),
        OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
        OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
                    "load module symbols - WARNING: use only with -k and LIVE kernel"),
@@ -478,6 +483,8 @@ static const struct option options[] = {
                   "columns '.' is reserved."),
        OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
                    "Only display entries resolved to a symbol"),
+       OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+                   "Look for files with symbols relative to this directory"),
        OPT_END()
 };
 
index 55f3b5dcc731417198a2e5fd29ac8eefd96e1a5e..7a4ebeb8b016b4ca01c14f393b3c5fab98567523 100644 (file)
@@ -1606,25 +1606,15 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session,
                process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data;
        struct thread *thread;
 
        if (!(session->sample_type & PERF_SAMPLE_RAW))
                return 0;
 
-       memset(&data, 0, sizeof(data));
-       data.time = -1;
-       data.cpu = -1;
-       data.period = -1;
-
-       event__parse_sample(event, session->sample_type, &data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-                   data.pid, data.tid, data.ip, data.period);
-
-       thread = perf_session__findnew(session, data.pid);
+       thread = perf_session__findnew(session, sample->pid);
        if (thread == NULL) {
                pr_debug("problem processing %d event, skipping it.\n",
                         event->header.type);
@@ -1633,10 +1623,11 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
        dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-       if (profile_cpu != -1 && profile_cpu != (int)data.cpu)
+       if (profile_cpu != -1 && profile_cpu != (int)sample->cpu)
                return 0;
 
-       process_raw_event(event, session, data.raw_data, data.cpu, data.time, thread);
+       process_raw_event(event, session, sample->raw_data, sample->cpu,
+                         sample->time, thread);
 
        return 0;
 }
@@ -1652,7 +1643,8 @@ static struct perf_event_ops event_ops = {
 static int read_events(void)
 {
        int err = -EINVAL;
-       struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+       struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+                                                        0, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
@@ -1869,6 +1861,9 @@ static int __cmd_record(int argc, const char **argv)
        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+       if (rec_argv)
+               return -ENOMEM;
+
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
                rec_argv[i] = strdup(record_args[i]);
 
@@ -1888,10 +1883,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
                usage_with_options(sched_usage, sched_options);
 
        /*
-        * Aliased to 'perf trace' for now:
+        * Aliased to 'perf script' for now:
         */
-       if (!strcmp(argv[0], "trace"))
-               return cmd_trace(argc, argv, prefix);
+       if (!strcmp(argv[0], "script"))
+               return cmd_script(argc, argv, prefix);
 
        symbol__init();
        if (!strncmp(argv[0], "rec", 3)) {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
new file mode 100644 (file)
index 0000000..150a606
--- /dev/null
@@ -0,0 +1,821 @@
+#include "builtin.h"
+
+#include "perf.h"
+#include "util/cache.h"
+#include "util/debug.h"
+#include "util/exec_cmd.h"
+#include "util/header.h"
+#include "util/parse-options.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/trace-event.h"
+#include "util/parse-options.h"
+#include "util/util.h"
+
+static char const              *script_name;
+static char const              *generate_script_lang;
+static bool                    debug_mode;
+static u64                     last_timestamp;
+static u64                     nr_unordered;
+extern const struct option     record_options[];
+
+static int default_start_script(const char *script __unused,
+                               int argc __unused,
+                               const char **argv __unused)
+{
+       return 0;
+}
+
+static int default_stop_script(void)
+{
+       return 0;
+}
+
+static int default_generate_script(const char *outfile __unused)
+{
+       return 0;
+}
+
+static struct scripting_ops default_scripting_ops = {
+       .start_script           = default_start_script,
+       .stop_script            = default_stop_script,
+       .process_event          = print_event,
+       .generate_script        = default_generate_script,
+};
+
+static struct scripting_ops    *scripting_ops;
+
+static void setup_scripting(void)
+{
+       setup_perl_scripting();
+       setup_python_scripting();
+
+       scripting_ops = &default_scripting_ops;
+}
+
+static int cleanup_scripting(void)
+{
+       pr_debug("\nperf script stopped\n");
+
+       return scripting_ops->stop_script();
+}
+
+static char const              *input_name = "perf.data";
+
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
+{
+       struct thread *thread = perf_session__findnew(session, event->ip.pid);
+
+       if (thread == NULL) {
+               pr_debug("problem processing %d event, skipping it.\n",
+                        event->header.type);
+               return -1;
+       }
+
+       if (session->sample_type & PERF_SAMPLE_RAW) {
+               if (debug_mode) {
+                       if (sample->time < last_timestamp) {
+                               pr_err("Samples misordered, previous: %llu "
+                                       "this: %llu\n", last_timestamp,
+                                       sample->time);
+                               nr_unordered++;
+                       }
+                       last_timestamp = sample->time;
+                       return 0;
+               }
+               /*
+                * FIXME: better resolve from pid from the struct trace_entry
+                * field, although it should be the same than this perf
+                * event pid
+                */
+               scripting_ops->process_event(sample->cpu, sample->raw_data,
+                                            sample->raw_size,
+                                            sample->time, thread->comm);
+       }
+
+       session->hists.stats.total_period += sample->period;
+       return 0;
+}
+
+static struct perf_event_ops event_ops = {
+       .sample = process_sample_event,
+       .comm   = event__process_comm,
+       .attr   = event__process_attr,
+       .event_type = event__process_event_type,
+       .tracing_data = event__process_tracing_data,
+       .build_id = event__process_build_id,
+       .ordering_requires_timestamps = true,
+       .ordered_samples = true,
+};
+
+extern volatile int session_done;
+
+static void sig_handler(int sig __unused)
+{
+       session_done = 1;
+}
+
+static int __cmd_script(struct perf_session *session)
+{
+       int ret;
+
+       signal(SIGINT, sig_handler);
+
+       ret = perf_session__process_events(session, &event_ops);
+
+       if (debug_mode)
+               pr_err("Misordered timestamps: %llu\n", nr_unordered);
+
+       return ret;
+}
+
+struct script_spec {
+       struct list_head        node;
+       struct scripting_ops    *ops;
+       char                    spec[0];
+};
+
+static LIST_HEAD(script_specs);
+
+static struct script_spec *script_spec__new(const char *spec,
+                                           struct scripting_ops *ops)
+{
+       struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1);
+
+       if (s != NULL) {
+               strcpy(s->spec, spec);
+               s->ops = ops;
+       }
+
+       return s;
+}
+
+static void script_spec__delete(struct script_spec *s)
+{
+       free(s->spec);
+       free(s);
+}
+
+static void script_spec__add(struct script_spec *s)
+{
+       list_add_tail(&s->node, &script_specs);
+}
+
+static struct script_spec *script_spec__find(const char *spec)
+{
+       struct script_spec *s;
+
+       list_for_each_entry(s, &script_specs, node)
+               if (strcasecmp(s->spec, spec) == 0)
+                       return s;
+       return NULL;
+}
+
+static struct script_spec *script_spec__findnew(const char *spec,
+                                               struct scripting_ops *ops)
+{
+       struct script_spec *s = script_spec__find(spec);
+
+       if (s)
+               return s;
+
+       s = script_spec__new(spec, ops);
+       if (!s)
+               goto out_delete_spec;
+
+       script_spec__add(s);
+
+       return s;
+
+out_delete_spec:
+       script_spec__delete(s);
+
+       return NULL;
+}
+
+int script_spec_register(const char *spec, struct scripting_ops *ops)
+{
+       struct script_spec *s;
+
+       s = script_spec__find(spec);
+       if (s)
+               return -1;
+
+       s = script_spec__findnew(spec, ops);
+       if (!s)
+               return -1;
+
+       return 0;
+}
+
+static struct scripting_ops *script_spec__lookup(const char *spec)
+{
+       struct script_spec *s = script_spec__find(spec);
+       if (!s)
+               return NULL;
+
+       return s->ops;
+}
+
+static void list_available_languages(void)
+{
+       struct script_spec *s;
+
+       fprintf(stderr, "\n");
+       fprintf(stderr, "Scripting language extensions (used in "
+               "perf script -s [spec:]script.[spec]):\n\n");
+
+       list_for_each_entry(s, &script_specs, node)
+               fprintf(stderr, "  %-42s [%s]\n", s->spec, s->ops->name);
+
+       fprintf(stderr, "\n");
+}
+
+static int parse_scriptname(const struct option *opt __used,
+                           const char *str, int unset __used)
+{
+       char spec[PATH_MAX];
+       const char *script, *ext;
+       int len;
+
+       if (strcmp(str, "lang") == 0) {
+               list_available_languages();
+               exit(0);
+       }
+
+       script = strchr(str, ':');
+       if (script) {
+               len = script - str;
+               if (len >= PATH_MAX) {
+                       fprintf(stderr, "invalid language specifier");
+                       return -1;
+               }
+               strncpy(spec, str, len);
+               spec[len] = '\0';
+               scripting_ops = script_spec__lookup(spec);
+               if (!scripting_ops) {
+                       fprintf(stderr, "invalid language specifier");
+                       return -1;
+               }
+               script++;
+       } else {
+               script = str;
+               ext = strrchr(script, '.');
+               if (!ext) {
+                       fprintf(stderr, "invalid script extension");
+                       return -1;
+               }
+               scripting_ops = script_spec__lookup(++ext);
+               if (!scripting_ops) {
+                       fprintf(stderr, "invalid script extension");
+                       return -1;
+               }
+       }
+
+       script_name = strdup(script);
+
+       return 0;
+}
+
+/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
+static int is_directory(const char *base_path, const struct dirent *dent)
+{
+       char path[PATH_MAX];
+       struct stat st;
+
+       sprintf(path, "%s/%s", base_path, dent->d_name);
+       if (stat(path, &st))
+               return 0;
+
+       return S_ISDIR(st.st_mode);
+}
+
+#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\
+       while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) &&     \
+              lang_next)                                               \
+               if ((lang_dirent.d_type == DT_DIR ||                    \
+                    (lang_dirent.d_type == DT_UNKNOWN &&               \
+                     is_directory(scripts_path, &lang_dirent))) &&     \
+                   (strcmp(lang_dirent.d_name, ".")) &&                \
+                   (strcmp(lang_dirent.d_name, "..")))
+
+#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\
+       while (!readdir_r(lang_dir, &script_dirent, &script_next) &&    \
+              script_next)                                             \
+               if (script_dirent.d_type != DT_DIR &&                   \
+                   (script_dirent.d_type != DT_UNKNOWN ||              \
+                    !is_directory(lang_path, &script_dirent)))
+
+
+#define RECORD_SUFFIX                  "-record"
+#define REPORT_SUFFIX                  "-report"
+
+struct script_desc {
+       struct list_head        node;
+       char                    *name;
+       char                    *half_liner;
+       char                    *args;
+};
+
+static LIST_HEAD(script_descs);
+
+static struct script_desc *script_desc__new(const char *name)
+{
+       struct script_desc *s = zalloc(sizeof(*s));
+
+       if (s != NULL && name)
+               s->name = strdup(name);
+
+       return s;
+}
+
+static void script_desc__delete(struct script_desc *s)
+{
+       free(s->name);
+       free(s->half_liner);
+       free(s->args);
+       free(s);
+}
+
+static void script_desc__add(struct script_desc *s)
+{
+       list_add_tail(&s->node, &script_descs);
+}
+
+static struct script_desc *script_desc__find(const char *name)
+{
+       struct script_desc *s;
+
+       list_for_each_entry(s, &script_descs, node)
+               if (strcasecmp(s->name, name) == 0)
+                       return s;
+       return NULL;
+}
+
+static struct script_desc *script_desc__findnew(const char *name)
+{
+       struct script_desc *s = script_desc__find(name);
+
+       if (s)
+               return s;
+
+       s = script_desc__new(name);
+       if (!s)
+               goto out_delete_desc;
+
+       script_desc__add(s);
+
+       return s;
+
+out_delete_desc:
+       script_desc__delete(s);
+
+       return NULL;
+}
+
+static const char *ends_with(const char *str, const char *suffix)
+{
+       size_t suffix_len = strlen(suffix);
+       const char *p = str;
+
+       if (strlen(str) > suffix_len) {
+               p = str + strlen(str) - suffix_len;
+               if (!strncmp(p, suffix, suffix_len))
+                       return p;
+       }
+
+       return NULL;
+}
+
+static char *ltrim(char *str)
+{
+       int len = strlen(str);
+
+       while (len && isspace(*str)) {
+               len--;
+               str++;
+       }
+
+       return str;
+}
+
+static int read_script_info(struct script_desc *desc, const char *filename)
+{
+       char line[BUFSIZ], *p;
+       FILE *fp;
+
+       fp = fopen(filename, "r");
+       if (!fp)
+               return -1;
+
+       while (fgets(line, sizeof(line), fp)) {
+               p = ltrim(line);
+               if (strlen(p) == 0)
+                       continue;
+               if (*p != '#')
+                       continue;
+               p++;
+               if (strlen(p) && *p == '!')
+                       continue;
+
+               p = ltrim(p);
+               if (strlen(p) && p[strlen(p) - 1] == '\n')
+                       p[strlen(p) - 1] = '\0';
+
+               if (!strncmp(p, "description:", strlen("description:"))) {
+                       p += strlen("description:");
+                       desc->half_liner = strdup(ltrim(p));
+                       continue;
+               }
+
+               if (!strncmp(p, "args:", strlen("args:"))) {
+                       p += strlen("args:");
+                       desc->args = strdup(ltrim(p));
+                       continue;
+               }
+       }
+
+       fclose(fp);
+
+       return 0;
+}
+
+static int list_available_scripts(const struct option *opt __used,
+                                 const char *s __used, int unset __used)
+{
+       struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
+       char scripts_path[MAXPATHLEN];
+       DIR *scripts_dir, *lang_dir;
+       char script_path[MAXPATHLEN];
+       char lang_path[MAXPATHLEN];
+       struct script_desc *desc;
+       char first_half[BUFSIZ];
+       char *script_root;
+       char *str;
+
+       snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
+
+       scripts_dir = opendir(scripts_path);
+       if (!scripts_dir)
+               return -1;
+
+       for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
+               snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
+                        lang_dirent.d_name);
+               lang_dir = opendir(lang_path);
+               if (!lang_dir)
+                       continue;
+
+               for_each_script(lang_path, lang_dir, script_dirent, script_next) {
+                       script_root = strdup(script_dirent.d_name);
+                       str = (char *)ends_with(script_root, REPORT_SUFFIX);
+                       if (str) {
+                               *str = '\0';
+                               desc = script_desc__findnew(script_root);
+                               snprintf(script_path, MAXPATHLEN, "%s/%s",
+                                        lang_path, script_dirent.d_name);
+                               read_script_info(desc, script_path);
+                       }
+                       free(script_root);
+               }
+       }
+
+       fprintf(stdout, "List of available trace scripts:\n");
+       list_for_each_entry(desc, &script_descs, node) {
+               sprintf(first_half, "%s %s", desc->name,
+                       desc->args ? desc->args : "");
+               fprintf(stdout, "  %-36s %s\n", first_half,
+                       desc->half_liner ? desc->half_liner : "");
+       }
+
+       exit(0);
+}
+
+static char *get_script_path(const char *script_root, const char *suffix)
+{
+       struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
+       char scripts_path[MAXPATHLEN];
+       char script_path[MAXPATHLEN];
+       DIR *scripts_dir, *lang_dir;
+       char lang_path[MAXPATHLEN];
+       char *str, *__script_root;
+       char *path = NULL;
+
+       snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
+
+       scripts_dir = opendir(scripts_path);
+       if (!scripts_dir)
+               return NULL;
+
+       for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
+               snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
+                        lang_dirent.d_name);
+               lang_dir = opendir(lang_path);
+               if (!lang_dir)
+                       continue;
+
+               for_each_script(lang_path, lang_dir, script_dirent, script_next) {
+                       __script_root = strdup(script_dirent.d_name);
+                       str = (char *)ends_with(__script_root, suffix);
+                       if (str) {
+                               *str = '\0';
+                               if (strcmp(__script_root, script_root))
+                                       continue;
+                               snprintf(script_path, MAXPATHLEN, "%s/%s",
+                                        lang_path, script_dirent.d_name);
+                               path = strdup(script_path);
+                               free(__script_root);
+                               break;
+                       }
+                       free(__script_root);
+               }
+       }
+
+       return path;
+}
+
+static bool is_top_script(const char *script_path)
+{
+       return ends_with(script_path, "top") == NULL ? false : true;
+}
+
+static int has_required_arg(char *script_path)
+{
+       struct script_desc *desc;
+       int n_args = 0;
+       char *p;
+
+       desc = script_desc__new(NULL);
+
+       if (read_script_info(desc, script_path))
+               goto out;
+
+       if (!desc->args)
+               goto out;
+
+       for (p = desc->args; *p; p++)
+               if (*p == '<')
+                       n_args++;
+out:
+       script_desc__delete(desc);
+
+       return n_args;
+}
+
+static const char * const script_usage[] = {
+       "perf script [<options>]",
+       "perf script [<options>] record <script> [<record-options>] <command>",
+       "perf script [<options>] report <script> [script-args]",
+       "perf script [<options>] <script> [<record-options>] <command>",
+       "perf script [<options>] <top-script> [script-args]",
+       NULL
+};
+
+static const struct option options[] = {
+       OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+                   "dump raw trace in ASCII"),
+       OPT_INCR('v', "verbose", &verbose,
+                   "be more verbose (show symbol address, etc)"),
+       OPT_BOOLEAN('L', "Latency", &latency_format,
+                   "show latency attributes (irqs/preemption disabled, etc)"),
+       OPT_CALLBACK_NOOPT('l', "list", NULL, NULL, "list available scripts",
+                          list_available_scripts),
+       OPT_CALLBACK('s', "script", NULL, "name",
+                    "script file name (lang:script name, script name, or *)",
+                    parse_scriptname),
+       OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
+                  "generate perf-script.xx script in specified language"),
+       OPT_STRING('i', "input", &input_name, "file",
+                   "input file name"),
+       OPT_BOOLEAN('d', "debug-mode", &debug_mode,
+                  "do various checks like samples ordering and lost events"),
+
+       OPT_END()
+};
+
+static bool have_cmd(int argc, const char **argv)
+{
+       char **__argv = malloc(sizeof(const char *) * argc);
+
+       if (!__argv)
+               die("malloc");
+       memcpy(__argv, argv, sizeof(const char *) * argc);
+       argc = parse_options(argc, (const char **)__argv, record_options,
+                            NULL, PARSE_OPT_STOP_AT_NON_OPTION);
+       free(__argv);
+
+       return argc != 0;
+}
+
+int cmd_script(int argc, const char **argv, const char *prefix __used)
+{
+       char *rec_script_path = NULL;
+       char *rep_script_path = NULL;
+       struct perf_session *session;
+       char *script_path = NULL;
+       const char **__argv;
+       bool system_wide;
+       int i, j, err;
+
+       setup_scripting();
+
+       argc = parse_options(argc, argv, options, script_usage,
+                            PARSE_OPT_STOP_AT_NON_OPTION);
+
+       if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
+               rec_script_path = get_script_path(argv[1], RECORD_SUFFIX);
+               if (!rec_script_path)
+                       return cmd_record(argc, argv, NULL);
+       }
+
+       if (argc > 1 && !strncmp(argv[0], "rep", strlen("rep"))) {
+               rep_script_path = get_script_path(argv[1], REPORT_SUFFIX);
+               if (!rep_script_path) {
+                       fprintf(stderr,
+                               "Please specify a valid report script"
+                               "(see 'perf script -l' for listing)\n");
+                       return -1;
+               }
+       }
+
+       /* make sure PERF_EXEC_PATH is set for scripts */
+       perf_set_argv_exec_path(perf_exec_path());
+
+       if (argc && !script_name && !rec_script_path && !rep_script_path) {
+               int live_pipe[2];
+               int rep_args;
+               pid_t pid;
+
+               rec_script_path = get_script_path(argv[0], RECORD_SUFFIX);
+               rep_script_path = get_script_path(argv[0], REPORT_SUFFIX);
+
+               if (!rec_script_path && !rep_script_path) {
+                       fprintf(stderr, " Couldn't find script %s\n\n See perf"
+                               " script -l for available scripts.\n", argv[0]);
+                       usage_with_options(script_usage, options);
+               }
+
+               if (is_top_script(argv[0])) {
+                       rep_args = argc - 1;
+               } else {
+                       int rec_args;
+
+                       rep_args = has_required_arg(rep_script_path);
+                       rec_args = (argc - 1) - rep_args;
+                       if (rec_args < 0) {
+                               fprintf(stderr, " %s script requires options."
+                                       "\n\n See perf script -l for available "
+                                       "scripts and options.\n", argv[0]);
+                               usage_with_options(script_usage, options);
+                       }
+               }
+
+               if (pipe(live_pipe) < 0) {
+                       perror("failed to create pipe");
+                       exit(-1);
+               }
+
+               pid = fork();
+               if (pid < 0) {
+                       perror("failed to fork");
+                       exit(-1);
+               }
+
+               if (!pid) {
+                       system_wide = true;
+                       j = 0;
+
+                       dup2(live_pipe[1], 1);
+                       close(live_pipe[0]);
+
+                       if (!is_top_script(argv[0]))
+                               system_wide = !have_cmd(argc - rep_args,
+                                                       &argv[rep_args]);
+
+                       __argv = malloc((argc + 6) * sizeof(const char *));
+                       if (!__argv)
+                               die("malloc");
+
+                       __argv[j++] = "/bin/sh";
+                       __argv[j++] = rec_script_path;
+                       if (system_wide)
+                               __argv[j++] = "-a";
+                       __argv[j++] = "-q";
+                       __argv[j++] = "-o";
+                       __argv[j++] = "-";
+                       for (i = rep_args + 1; i < argc; i++)
+                               __argv[j++] = argv[i];
+                       __argv[j++] = NULL;
+
+                       execvp("/bin/sh", (char **)__argv);
+                       free(__argv);
+                       exit(-1);
+               }
+
+               dup2(live_pipe[0], 0);
+               close(live_pipe[1]);
+
+               __argv = malloc((argc + 4) * sizeof(const char *));
+               if (!__argv)
+                       die("malloc");
+               j = 0;
+               __argv[j++] = "/bin/sh";
+               __argv[j++] = rep_script_path;
+               for (i = 1; i < rep_args + 1; i++)
+                       __argv[j++] = argv[i];
+               __argv[j++] = "-i";
+               __argv[j++] = "-";
+               __argv[j++] = NULL;
+
+               execvp("/bin/sh", (char **)__argv);
+               free(__argv);
+               exit(-1);
+       }
+
+       if (rec_script_path)
+               script_path = rec_script_path;
+       if (rep_script_path)
+               script_path = rep_script_path;
+
+       if (script_path) {
+               system_wide = false;
+               j = 0;
+
+               if (rec_script_path)
+                       system_wide = !have_cmd(argc - 1, &argv[1]);
+
+               __argv = malloc((argc + 2) * sizeof(const char *));
+               if (!__argv)
+                       die("malloc");
+               __argv[j++] = "/bin/sh";
+               __argv[j++] = script_path;
+               if (system_wide)
+                       __argv[j++] = "-a";
+               for (i = 2; i < argc; i++)
+                       __argv[j++] = argv[i];
+               __argv[j++] = NULL;
+
+               execvp("/bin/sh", (char **)__argv);
+               free(__argv);
+               exit(-1);
+       }
+
+       if (symbol__init() < 0)
+               return -1;
+       if (!script_name)
+               setup_pager();
+
+       session = perf_session__new(input_name, O_RDONLY, 0, false, &event_ops);
+       if (session == NULL)
+               return -ENOMEM;
+
+       if (strcmp(input_name, "-") &&
+           !perf_session__has_traces(session, "record -R"))
+               return -EINVAL;
+
+       if (generate_script_lang) {
+               struct stat perf_stat;
+
+               int input = open(input_name, O_RDONLY);
+               if (input < 0) {
+                       perror("failed to open file");
+                       exit(-1);
+               }
+
+               err = fstat(input, &perf_stat);
+               if (err < 0) {
+                       perror("failed to stat file");
+                       exit(-1);
+               }
+
+               if (!perf_stat.st_size) {
+                       fprintf(stderr, "zero-sized file, nothing to do!\n");
+                       exit(0);
+               }
+
+               scripting_ops = script_spec__lookup(generate_script_lang);
+               if (!scripting_ops) {
+                       fprintf(stderr, "invalid language specifier");
+                       return -1;
+               }
+
+               err = scripting_ops->generate_script("perf-script");
+               goto out;
+       }
+
+       if (script_name) {
+               err = scripting_ops->start_script(script_name, argc, argv);
+               if (err)
+                       goto out;
+               pr_debug("perf script started with script %s\n\n", script_name);
+       }
+
+       err = __cmd_script(session);
+
+       perf_session__delete(session);
+       cleanup_scripting();
+out:
+       return err;
+}
index a6b4d44f950246e27d4cb6b0bc3e6d5afd27adcc..02b2d8013a61e537bf80e5d110d6c2f2c3f6b150 100644 (file)
@@ -43,6 +43,7 @@
 #include "util/parse-options.h"
 #include "util/parse-events.h"
 #include "util/event.h"
+#include "util/evsel.h"
 #include "util/debug.h"
 #include "util/header.h"
 #include "util/cpumap.h"
@@ -52,6 +53,8 @@
 #include <math.h>
 #include <locale.h>
 
+#define DEFAULT_SEPARATOR      " "
+
 static struct perf_event_attr default_attrs[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK             },
@@ -69,25 +72,23 @@ static struct perf_event_attr default_attrs[] = {
 };
 
 static bool                    system_wide                     =  false;
-static int                     nr_cpus                         =  0;
+static struct cpu_map          *cpus;
 static int                     run_idx                         =  0;
 
 static int                     run_count                       =  1;
 static bool                    no_inherit                      = false;
 static bool                    scale                           =  true;
+static bool                    no_aggr                         = false;
 static pid_t                   target_pid                      = -1;
 static pid_t                   target_tid                      = -1;
-static pid_t                   *all_tids                       =  NULL;
-static int                     thread_num                      =  0;
+static struct thread_map       *threads;
 static pid_t                   child_pid                       = -1;
 static bool                    null_run                        =  false;
-static bool                    big_num                         =  false;
+static bool                    big_num                         =  true;
+static int                     big_num_opt                     =  -1;
 static const char              *cpu_list;
-
-
-static int                     *fd[MAX_NR_CPUS][MAX_COUNTERS];
-
-static int                     event_scaled[MAX_COUNTERS];
+static const char              *csv_sep                        = NULL;
+static bool                    csv_output                      = false;
 
 static volatile int done = 0;
 
@@ -96,6 +97,22 @@ struct stats
        double n, mean, M2;
 };
 
+struct perf_stat {
+       struct stats      res_stats[3];
+};
+
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
+{
+       evsel->priv = zalloc(sizeof(struct perf_stat));
+       return evsel->priv == NULL ? -ENOMEM : 0;
+}
+
+static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
+{
+       free(evsel->priv);
+       evsel->priv = NULL;
+}
+
 static void update_stats(struct stats *stats, u64 val)
 {
        double delta;
@@ -135,69 +152,38 @@ static double stddev_stats(struct stats *stats)
        return sqrt(variance_mean);
 }
 
-struct stats                   event_res_stats[MAX_COUNTERS][3];
-struct stats                   runtime_nsecs_stats;
+struct stats                   runtime_nsecs_stats[MAX_NR_CPUS];
+struct stats                   runtime_cycles_stats[MAX_NR_CPUS];
+struct stats                   runtime_branches_stats[MAX_NR_CPUS];
 struct stats                   walltime_nsecs_stats;
-struct stats                   runtime_cycles_stats;
-struct stats                   runtime_branches_stats;
 
-#define MATCH_EVENT(t, c, counter)                     \
-       (attrs[counter].type == PERF_TYPE_##t &&        \
-        attrs[counter].config == PERF_COUNT_##c)
-
-#define ERR_PERF_OPEN \
-"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
-
-static int create_perf_stat_counter(int counter)
+static int create_perf_stat_counter(struct perf_evsel *evsel)
 {
-       struct perf_event_attr *attr = attrs + counter;
-       int thread;
-       int ncreated = 0;
+       struct perf_event_attr *attr = &evsel->attr;
 
        if (scale)
                attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
                                    PERF_FORMAT_TOTAL_TIME_RUNNING;
 
-       if (system_wide) {
-               int cpu;
-
-               for (cpu = 0; cpu < nr_cpus; cpu++) {
-                       fd[cpu][counter][0] = sys_perf_event_open(attr,
-                                       -1, cpumap[cpu], -1, 0);
-                       if (fd[cpu][counter][0] < 0)
-                               pr_debug(ERR_PERF_OPEN, counter,
-                                        fd[cpu][counter][0], strerror(errno));
-                       else
-                               ++ncreated;
-               }
-       } else {
-               attr->inherit = !no_inherit;
-               if (target_pid == -1 && target_tid == -1) {
-                       attr->disabled = 1;
-                       attr->enable_on_exec = 1;
-               }
-               for (thread = 0; thread < thread_num; thread++) {
-                       fd[0][counter][thread] = sys_perf_event_open(attr,
-                               all_tids[thread], -1, -1, 0);
-                       if (fd[0][counter][thread] < 0)
-                               pr_debug(ERR_PERF_OPEN, counter,
-                                        fd[0][counter][thread],
-                                        strerror(errno));
-                       else
-                               ++ncreated;
-               }
+       if (system_wide)
+               return perf_evsel__open_per_cpu(evsel, cpus);
+
+       attr->inherit = !no_inherit;
+       if (target_pid == -1 && target_tid == -1) {
+               attr->disabled = 1;
+               attr->enable_on_exec = 1;
        }
 
-       return ncreated;
+       return perf_evsel__open_per_thread(evsel, threads);
 }
 
 /*
  * Does the counter have nsecs as a unit?
  */
-static inline int nsec_counter(int counter)
+static inline int nsec_counter(struct perf_evsel *evsel)
 {
-       if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) ||
-           MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
+       if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
+           perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
                return 1;
 
        return 0;
@@ -205,55 +191,19 @@ static inline int nsec_counter(int counter)
 
 /*
  * Read out the results of a single counter:
+ * aggregate counts across CPUs in system-wide mode
  */
-static void read_counter(int counter)
+static int read_counter_aggr(struct perf_evsel *counter)
 {
-       u64 count[3], single_count[3];
-       int cpu;
-       size_t res, nv;
-       int scaled;
-       int i, thread;
-
-       count[0] = count[1] = count[2] = 0;
-
-       nv = scale ? 3 : 1;
-       for (cpu = 0; cpu < nr_cpus; cpu++) {
-               for (thread = 0; thread < thread_num; thread++) {
-                       if (fd[cpu][counter][thread] < 0)
-                               continue;
-
-                       res = read(fd[cpu][counter][thread],
-                                       single_count, nv * sizeof(u64));
-                       assert(res == nv * sizeof(u64));
-
-                       close(fd[cpu][counter][thread]);
-                       fd[cpu][counter][thread] = -1;
-
-                       count[0] += single_count[0];
-                       if (scale) {
-                               count[1] += single_count[1];
-                               count[2] += single_count[2];
-                       }
-               }
-       }
-
-       scaled = 0;
-       if (scale) {
-               if (count[2] == 0) {
-                       event_scaled[counter] = -1;
-                       count[0] = 0;
-                       return;
-               }
+       struct perf_stat *ps = counter->priv;
+       u64 *count = counter->counts->aggr.values;
+       int i;
 
-               if (count[2] < count[1]) {
-                       event_scaled[counter] = 1;
-                       count[0] = (unsigned long long)
-                               ((double)count[0] * count[1] / count[2] + 0.5);
-               }
-       }
+       if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0)
+               return -1;
 
        for (i = 0; i < 3; i++)
-               update_stats(&event_res_stats[counter][i], count[i]);
+               update_stats(&ps->res_stats[i], count[i]);
 
        if (verbose) {
                fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter),
@@ -263,26 +213,51 @@ static void read_counter(int counter)
        /*
         * Save the full runtime - to allow normalization during printout:
         */
-       if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
-               update_stats(&runtime_nsecs_stats, count[0]);
-       if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
-               update_stats(&runtime_cycles_stats, count[0]);
-       if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
-               update_stats(&runtime_branches_stats, count[0]);
+       if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
+               update_stats(&runtime_nsecs_stats[0], count[0]);
+       if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+               update_stats(&runtime_cycles_stats[0], count[0]);
+       if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+               update_stats(&runtime_branches_stats[0], count[0]);
+
+       return 0;
+}
+
+/*
+ * Read out the results of a single counter:
+ * do not aggregate counts across CPUs in system-wide mode
+ */
+static int read_counter(struct perf_evsel *counter)
+{
+       u64 *count;
+       int cpu;
+
+       for (cpu = 0; cpu < cpus->nr; cpu++) {
+               if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
+                       return -1;
+
+               count = counter->counts->cpu[cpu].values;
+
+               if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
+                       update_stats(&runtime_nsecs_stats[cpu], count[0]);
+               if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+                       update_stats(&runtime_cycles_stats[cpu], count[0]);
+               if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+                       update_stats(&runtime_branches_stats[cpu], count[0]);
+       }
+
+       return 0;
 }
 
 static int run_perf_stat(int argc __used, const char **argv)
 {
        unsigned long long t0, t1;
+       struct perf_evsel *counter;
        int status = 0;
-       int counter, ncreated = 0;
        int child_ready_pipe[2], go_pipe[2];
        const bool forks = (argc > 0);
        char buf;
 
-       if (!system_wide)
-               nr_cpus = 1;
-
        if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
                perror("failed to create pipes");
                exit(1);
@@ -322,7 +297,7 @@ static int run_perf_stat(int argc __used, const char **argv)
                }
 
                if (target_tid == -1 && target_pid == -1 && !system_wide)
-                       all_tids[0] = child_pid;
+                       threads->map[0] = child_pid;
 
                /*
                 * Wait for the child to be ready to exec.
@@ -334,16 +309,23 @@ static int run_perf_stat(int argc __used, const char **argv)
                close(child_ready_pipe[0]);
        }
 
-       for (counter = 0; counter < nr_counters; counter++)
-               ncreated += create_perf_stat_counter(counter);
-
-       if (ncreated == 0) {
-               pr_err("No permission to collect %sstats.\n"
-                      "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n",
-                      system_wide ? "system-wide " : "");
-               if (child_pid != -1)
-                       kill(child_pid, SIGTERM);
-               return -1;
+       list_for_each_entry(counter, &evsel_list, node) {
+               if (create_perf_stat_counter(counter) < 0) {
+                       if (errno == -EPERM || errno == -EACCES) {
+                               error("You may not have permission to collect %sstats.\n"
+                                     "\t Consider tweaking"
+                                     " /proc/sys/kernel/perf_event_paranoid or running as root.",
+                                     system_wide ? "system-wide " : "");
+                       } else {
+                               error("open_counter returned with %d (%s). "
+                                     "/bin/dmesg may provide additional information.\n",
+                                      errno, strerror(errno));
+                       }
+                       if (child_pid != -1)
+                               kill(child_pid, SIGTERM);
+                       die("Not all events could be opened.\n");
+                       return -1;
+               }
        }
 
        /*
@@ -362,60 +344,97 @@ static int run_perf_stat(int argc __used, const char **argv)
 
        update_stats(&walltime_nsecs_stats, t1 - t0);
 
-       for (counter = 0; counter < nr_counters; counter++)
-               read_counter(counter);
+       if (no_aggr) {
+               list_for_each_entry(counter, &evsel_list, node) {
+                       read_counter(counter);
+                       perf_evsel__close_fd(counter, cpus->nr, 1);
+               }
+       } else {
+               list_for_each_entry(counter, &evsel_list, node) {
+                       read_counter_aggr(counter);
+                       perf_evsel__close_fd(counter, cpus->nr, threads->nr);
+               }
+       }
 
        return WEXITSTATUS(status);
 }
 
-static void print_noise(int counter, double avg)
+static void print_noise(struct perf_evsel *evsel, double avg)
 {
+       struct perf_stat *ps;
+
        if (run_count == 1)
                return;
 
+       ps = evsel->priv;
        fprintf(stderr, "   ( +- %7.3f%% )",
-                       100 * stddev_stats(&event_res_stats[counter][0]) / avg);
+                       100 * stddev_stats(&ps->res_stats[0]) / avg);
 }
 
-static void nsec_printout(int counter, double avg)
+static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
        double msecs = avg / 1e6;
+       char cpustr[16] = { '\0', };
+       const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
 
-       fprintf(stderr, " %18.6f  %-24s", msecs, event_name(counter));
+       if (no_aggr)
+               sprintf(cpustr, "CPU%*d%s",
+                       csv_output ? 0 : -4,
+                       cpus->map[cpu], csv_sep);
+
+       fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
+
+       if (csv_output)
+               return;
 
-       if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
+       if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
                fprintf(stderr, " # %10.3f CPUs ",
                                avg / avg_stats(&walltime_nsecs_stats));
-       }
 }
 
-static void abs_printout(int counter, double avg)
+static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
        double total, ratio = 0.0;
+       char cpustr[16] = { '\0', };
+       const char *fmt;
+
+       if (csv_output)
+               fmt = "%s%.0f%s%s";
+       else if (big_num)
+               fmt = "%s%'18.0f%s%-24s";
+       else
+               fmt = "%s%18.0f%s%-24s";
 
-       if (big_num)
-               fprintf(stderr, " %'18.0f  %-24s", avg, event_name(counter));
+       if (no_aggr)
+               sprintf(cpustr, "CPU%*d%s",
+                       csv_output ? 0 : -4,
+                       cpus->map[cpu], csv_sep);
        else
-               fprintf(stderr, " %18.0f  %-24s", avg, event_name(counter));
+               cpu = 0;
+
+       fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
 
-       if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
-               total = avg_stats(&runtime_cycles_stats);
+       if (csv_output)
+               return;
+
+       if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+               total = avg_stats(&runtime_cycles_stats[cpu]);
 
                if (total)
                        ratio = avg / total;
 
                fprintf(stderr, " # %10.3f IPC  ", ratio);
-       } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
-                       runtime_branches_stats.n != 0) {
-               total = avg_stats(&runtime_branches_stats);
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
+                       runtime_branches_stats[cpu].n != 0) {
+               total = avg_stats(&runtime_branches_stats[cpu]);
 
                if (total)
                        ratio = avg * 100 / total;
 
                fprintf(stderr, " # %10.3f %%    ", ratio);
 
-       } else if (runtime_nsecs_stats.n != 0) {
-               total = avg_stats(&runtime_nsecs_stats);
+       } else if (runtime_nsecs_stats[cpu].n != 0) {
+               total = avg_stats(&runtime_nsecs_stats[cpu]);
 
                if (total)
                        ratio = 1000.0 * avg / total;
@@ -426,30 +445,38 @@ static void abs_printout(int counter, double avg)
 
 /*
  * Print out the results of a single counter:
+ * aggregated counts in system-wide mode
  */
-static void print_counter(int counter)
+static void print_counter_aggr(struct perf_evsel *counter)
 {
-       double avg = avg_stats(&event_res_stats[counter][0]);
-       int scaled = event_scaled[counter];
+       struct perf_stat *ps = counter->priv;
+       double avg = avg_stats(&ps->res_stats[0]);
+       int scaled = counter->counts->scaled;
 
        if (scaled == -1) {
-               fprintf(stderr, " %18s  %-24s\n",
-                       "<not counted>", event_name(counter));
+               fprintf(stderr, "%*s%s%-24s\n",
+                       csv_output ? 0 : 18,
+                       "<not counted>", csv_sep, event_name(counter));
                return;
        }
 
        if (nsec_counter(counter))
-               nsec_printout(counter, avg);
+               nsec_printout(-1, counter, avg);
        else
-               abs_printout(counter, avg);
+               abs_printout(-1, counter, avg);
+
+       if (csv_output) {
+               fputc('\n', stderr);
+               return;
+       }
 
        print_noise(counter, avg);
 
        if (scaled) {
                double avg_enabled, avg_running;
 
-               avg_enabled = avg_stats(&event_res_stats[counter][1]);
-               avg_running = avg_stats(&event_res_stats[counter][2]);
+               avg_enabled = avg_stats(&ps->res_stats[1]);
+               avg_running = avg_stats(&ps->res_stats[2]);
 
                fprintf(stderr, "  (scaled from %.2f%%)",
                                100 * avg_running / avg_enabled);
@@ -458,40 +485,92 @@ static void print_counter(int counter)
        fprintf(stderr, "\n");
 }
 
+/*
+ * Print out the results of a single counter:
+ * does not use aggregated count in system-wide
+ */
+static void print_counter(struct perf_evsel *counter)
+{
+       u64 ena, run, val;
+       int cpu;
+
+       for (cpu = 0; cpu < cpus->nr; cpu++) {
+               val = counter->counts->cpu[cpu].val;
+               ena = counter->counts->cpu[cpu].ena;
+               run = counter->counts->cpu[cpu].run;
+               if (run == 0 || ena == 0) {
+                       fprintf(stderr, "CPU%*d%s%*s%s%-24s",
+                               csv_output ? 0 : -4,
+                               cpus->map[cpu], csv_sep,
+                               csv_output ? 0 : 18,
+                               "<not counted>", csv_sep,
+                               event_name(counter));
+
+                       fprintf(stderr, "\n");
+                       continue;
+               }
+
+               if (nsec_counter(counter))
+                       nsec_printout(cpu, counter, val);
+               else
+                       abs_printout(cpu, counter, val);
+
+               if (!csv_output) {
+                       print_noise(counter, 1.0);
+
+                       if (run != ena) {
+                               fprintf(stderr, "  (scaled from %.2f%%)",
+                                       100.0 * run / ena);
+                       }
+               }
+               fprintf(stderr, "\n");
+       }
+}
+
 static void print_stat(int argc, const char **argv)
 {
-       int i, counter;
+       struct perf_evsel *counter;
+       int i;
 
        fflush(stdout);
 
-       fprintf(stderr, "\n");
-       fprintf(stderr, " Performance counter stats for ");
-       if(target_pid == -1 && target_tid == -1) {
-               fprintf(stderr, "\'%s", argv[0]);
-               for (i = 1; i < argc; i++)
-                       fprintf(stderr, " %s", argv[i]);
-       } else if (target_pid != -1)
-               fprintf(stderr, "process id \'%d", target_pid);
-       else
-               fprintf(stderr, "thread id \'%d", target_tid);
-
-       fprintf(stderr, "\'");
-       if (run_count > 1)
-               fprintf(stderr, " (%d runs)", run_count);
-       fprintf(stderr, ":\n\n");
+       if (!csv_output) {
+               fprintf(stderr, "\n");
+               fprintf(stderr, " Performance counter stats for ");
+               if(target_pid == -1 && target_tid == -1) {
+                       fprintf(stderr, "\'%s", argv[0]);
+                       for (i = 1; i < argc; i++)
+                               fprintf(stderr, " %s", argv[i]);
+               } else if (target_pid != -1)
+                       fprintf(stderr, "process id \'%d", target_pid);
+               else
+                       fprintf(stderr, "thread id \'%d", target_tid);
+
+               fprintf(stderr, "\'");
+               if (run_count > 1)
+                       fprintf(stderr, " (%d runs)", run_count);
+               fprintf(stderr, ":\n\n");
+       }
 
-       for (counter = 0; counter < nr_counters; counter++)
-               print_counter(counter);
+       if (no_aggr) {
+               list_for_each_entry(counter, &evsel_list, node)
+                       print_counter(counter);
+       } else {
+               list_for_each_entry(counter, &evsel_list, node)
+                       print_counter_aggr(counter);
+       }
 
-       fprintf(stderr, "\n");
-       fprintf(stderr, " %18.9f  seconds time elapsed",
-                       avg_stats(&walltime_nsecs_stats)/1e9);
-       if (run_count > 1) {
-               fprintf(stderr, "   ( +- %7.3f%% )",
+       if (!csv_output) {
+               fprintf(stderr, "\n");
+               fprintf(stderr, " %18.9f  seconds time elapsed",
+                               avg_stats(&walltime_nsecs_stats)/1e9);
+               if (run_count > 1) {
+                       fprintf(stderr, "   ( +- %7.3f%% )",
                                100*stddev_stats(&walltime_nsecs_stats) /
                                avg_stats(&walltime_nsecs_stats));
+               }
+               fprintf(stderr, "\n\n");
        }
-       fprintf(stderr, "\n\n");
 }
 
 static volatile int signr = -1;
@@ -521,6 +600,13 @@ static const char * const stat_usage[] = {
        NULL
 };
 
+static int stat__set_big_num(const struct option *opt __used,
+                            const char *s __used, int unset)
+{
+       big_num_opt = unset ? 0 : 1;
+       return 0;
+}
+
 static const struct option options[] = {
        OPT_CALLBACK('e', "event", NULL, "event",
                     "event selector. use 'perf list' to list available events",
@@ -541,64 +627,96 @@ static const struct option options[] = {
                    "repeat command and print average + stddev (max: 100)"),
        OPT_BOOLEAN('n', "null", &null_run,
                    "null run - dont start any counters"),
-       OPT_BOOLEAN('B', "big-num", &big_num,
-                   "print large numbers with thousands\' separators"),
+       OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
+                          "print large numbers with thousands\' separators",
+                          stat__set_big_num),
        OPT_STRING('C', "cpu", &cpu_list, "cpu",
                    "list of cpus to monitor in system-wide"),
+       OPT_BOOLEAN('A', "no-aggr", &no_aggr,
+                   "disable CPU count aggregation"),
+       OPT_STRING('x', "field-separator", &csv_sep, "separator",
+                  "print counts with custom separator"),
        OPT_END()
 };
 
 int cmd_stat(int argc, const char **argv, const char *prefix __used)
 {
-       int status;
-       int i,j;
+       struct perf_evsel *pos;
+       int status = -ENOMEM;
 
        setlocale(LC_ALL, "");
 
        argc = parse_options(argc, argv, options, stat_usage,
                PARSE_OPT_STOP_AT_NON_OPTION);
+
+       if (csv_sep)
+               csv_output = true;
+       else
+               csv_sep = DEFAULT_SEPARATOR;
+
+       /*
+        * let the spreadsheet do the pretty-printing
+        */
+       if (csv_output) {
+               /* User explicitely passed -B? */
+               if (big_num_opt == 1) {
+                       fprintf(stderr, "-B option not supported with -x\n");
+                       usage_with_options(stat_usage, options);
+               } else /* Nope, so disable big number formatting */
+                       big_num = false;
+       } else if (big_num_opt == 0) /* User passed --no-big-num */
+               big_num = false;
+
        if (!argc && target_pid == -1 && target_tid == -1)
                usage_with_options(stat_usage, options);
        if (run_count <= 0)
                usage_with_options(stat_usage, options);
 
+       /* no_aggr is for system-wide only */
+       if (no_aggr && !system_wide)
+               usage_with_options(stat_usage, options);
+
        /* Set attrs and nr_counters if no event is selected and !null_run */
        if (!null_run && !nr_counters) {
-               memcpy(attrs, default_attrs, sizeof(default_attrs));
+               size_t c;
+
                nr_counters = ARRAY_SIZE(default_attrs);
+
+               for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
+                       pos = perf_evsel__new(default_attrs[c].type,
+                                             default_attrs[c].config,
+                                             nr_counters);
+                       if (pos == NULL)
+                               goto out;
+                       list_add(&pos->node, &evsel_list);
+               }
        }
 
-       if (system_wide)
-               nr_cpus = read_cpu_map(cpu_list);
-       else
-               nr_cpus = 1;
+       if (target_pid != -1)
+               target_tid = target_pid;
 
-       if (nr_cpus < 1)
+       threads = thread_map__new(target_pid, target_tid);
+       if (threads == NULL) {
+               pr_err("Problems finding threads of monitor\n");
                usage_with_options(stat_usage, options);
+       }
 
-       if (target_pid != -1) {
-               target_tid = target_pid;
-               thread_num = find_all_tid(target_pid, &all_tids);
-               if (thread_num <= 0) {
-                       fprintf(stderr, "Can't find all threads of pid %d\n",
-                                       target_pid);
-                       usage_with_options(stat_usage, options);
-               }
-       } else {
-               all_tids=malloc(sizeof(pid_t));
-               if (!all_tids)
-                       return -ENOMEM;
+       if (system_wide)
+               cpus = cpu_map__new(cpu_list);
+       else
+               cpus = cpu_map__dummy_new();
 
-               all_tids[0] = target_tid;
-               thread_num = 1;
+       if (cpus == NULL) {
+               perror("failed to parse CPUs map");
+               usage_with_options(stat_usage, options);
+               return -1;
        }
 
-       for (i = 0; i < MAX_NR_CPUS; i++) {
-               for (j = 0; j < MAX_COUNTERS; j++) {
-                       fd[i][j] = malloc(sizeof(int)*thread_num);
-                       if (!fd[i][j])
-                               return -ENOMEM;
-               }
+       list_for_each_entry(pos, &evsel_list, node) {
+               if (perf_evsel__alloc_stat_priv(pos) < 0 ||
+                   perf_evsel__alloc_counts(pos, cpus->nr) < 0 ||
+                   perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+                       goto out_free_fd;
        }
 
        /*
@@ -621,6 +739,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 
        if (status != -1)
                print_stat(argc, argv);
-
+out_free_fd:
+       list_for_each_entry(pos, &evsel_list, node)
+               perf_evsel__free_stat_priv(pos);
+out:
+       thread_map__delete(threads);
+       threads = NULL;
        return status;
 }
index 035b9fa063a9453002873c00f654adcd13a99eb2..1c984342a5795090d2e863d0b2da1a995e0201d2 100644 (file)
@@ -119,10 +119,16 @@ static int test__vmlinux_matches_kallsyms(void)
         * end addresses too.
         */
        for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
-               struct symbol *pair;
+               struct symbol *pair, *first_pair;
+               bool backwards = true;
 
                sym  = rb_entry(nd, struct symbol, rb_node);
-               pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
+
+               if (sym->start == sym->end)
+                       continue;
+
+               first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
+               pair = first_pair;
 
                if (pair && pair->start == sym->start) {
 next_pair:
@@ -143,8 +149,10 @@ next_pair:
                                pr_debug("%#Lx: diff end addr for %s v: %#Lx k: %#Lx\n",
                                         sym->start, sym->name, sym->end, pair->end);
                        } else {
-                               struct rb_node *nnd = rb_prev(&pair->rb_node);
-
+                               struct rb_node *nnd;
+detour:
+                               nnd = backwards ? rb_prev(&pair->rb_node) :
+                                                 rb_next(&pair->rb_node);
                                if (nnd) {
                                        struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
 
@@ -153,6 +161,13 @@ next_pair:
                                                goto next_pair;
                                        }
                                }
+
+                               if (backwards) {
+                                       backwards = false;
+                                       pair = first_pair;
+                                       goto detour;
+                               }
+
                                pr_debug("%#Lx: diff name v: %s k: %s\n",
                                         sym->start, sym->name, pair->name);
                        }
@@ -219,6 +234,89 @@ out:
        return err;
 }
 
+#include "util/evsel.h"
+#include <sys/types.h>
+
+static int trace_event__id(const char *event_name)
+{
+       char *filename;
+       int err = -1, fd;
+
+       if (asprintf(&filename,
+                    "/sys/kernel/debug/tracing/events/syscalls/%s/id",
+                    event_name) < 0)
+               return -1;
+
+       fd = open(filename, O_RDONLY);
+       if (fd >= 0) {
+               char id[16];
+               if (read(fd, id, sizeof(id)) > 0)
+                       err = atoi(id);
+               close(fd);
+       }
+
+       free(filename);
+       return err;
+}
+
+static int test__open_syscall_event(void)
+{
+       int err = -1, fd;
+       struct thread_map *threads;
+       struct perf_evsel *evsel;
+       unsigned int nr_open_calls = 111, i;
+       int id = trace_event__id("sys_enter_open");
+
+       if (id < 0) {
+               pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+               return -1;
+       }
+
+       threads = thread_map__new(-1, getpid());
+       if (threads == NULL) {
+               pr_debug("thread_map__new\n");
+               return -1;
+       }
+
+       evsel = perf_evsel__new(PERF_TYPE_TRACEPOINT, id, 0);
+       if (evsel == NULL) {
+               pr_debug("perf_evsel__new\n");
+               goto out_thread_map_delete;
+       }
+
+       if (perf_evsel__open_per_thread(evsel, threads) < 0) {
+               pr_debug("failed to open counter: %s, "
+                        "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+                        strerror(errno));
+               goto out_evsel_delete;
+       }
+
+       for (i = 0; i < nr_open_calls; ++i) {
+               fd = open("/etc/passwd", O_RDONLY);
+               close(fd);
+       }
+
+       if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
+               pr_debug("perf_evsel__open_read_on_cpu\n");
+               goto out_close_fd;
+       }
+
+       if (evsel->counts->cpu[0].val != nr_open_calls) {
+               pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %Ld\n",
+                        nr_open_calls, evsel->counts->cpu[0].val);
+               goto out_close_fd;
+       }
+       
+       err = 0;
+out_close_fd:
+       perf_evsel__close_fd(evsel, 1, threads->nr);
+out_evsel_delete:
+       perf_evsel__delete(evsel);
+out_thread_map_delete:
+       thread_map__delete(threads);
+       return err;
+}
+
 static struct test {
        const char *desc;
        int (*func)(void);
@@ -227,6 +325,10 @@ static struct test {
                .desc = "vmlinux symtab matches kallsyms",
                .func = test__vmlinux_matches_kallsyms,
        },
+       {
+               .desc = "detect open syscall event",
+               .func = test__open_syscall_event,
+       },
        {
                .func = NULL,
        },
index 9bcc38f0b706f91ca3701e440c15e3e9c8aa7bd3..746cf03cb05d86a2796c88fca27930ca6e0a8894 100644 (file)
 #include "util/session.h"
 #include "util/svghelper.h"
 
+#define SUPPORT_OLD_POWER_EVENTS 1
+#define PWR_EVENT_EXIT -1
+
+
 static char            const *input_name = "perf.data";
 static char            const *output_name = "output.svg";
 
@@ -272,19 +276,22 @@ static int cpus_cstate_state[MAX_CPUS];
 static u64 cpus_pstate_start_times[MAX_CPUS];
 static u64 cpus_pstate_state[MAX_CPUS];
 
-static int process_comm_event(event_t *event, struct perf_session *session __used)
+static int process_comm_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        pid_set_comm(event->comm.tid, event->comm.comm);
        return 0;
 }
 
-static int process_fork_event(event_t *event, struct perf_session *session __used)
+static int process_fork_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
        return 0;
 }
 
-static int process_exit_event(event_t *event, struct perf_session *session __used)
+static int process_exit_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        pid_exit(event->fork.pid, event->fork.time);
        return 0;
@@ -298,12 +305,21 @@ struct trace_entry {
        int                     lock_depth;
 };
 
-struct power_entry {
+#ifdef SUPPORT_OLD_POWER_EVENTS
+static int use_old_power_events;
+struct power_entry_old {
        struct trace_entry te;
        u64     type;
        u64     value;
        u64     cpu_id;
 };
+#endif
+
+struct power_processor_entry {
+       struct trace_entry te;
+       u32     state;
+       u32     cpu_id;
+};
 
 #define TASK_COMM_LEN 16
 struct wakeup_entry {
@@ -470,48 +486,65 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
 }
 
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event __used,
+                               struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data;
        struct trace_entry *te;
 
-       memset(&data, 0, sizeof(data));
-
-       event__parse_sample(event, session->sample_type, &data);
-
        if (session->sample_type & PERF_SAMPLE_TIME) {
-               if (!first_time || first_time > data.time)
-                       first_time = data.time;
-               if (last_time < data.time)
-                       last_time = data.time;
+               if (!first_time || first_time > sample->time)
+                       first_time = sample->time;
+               if (last_time < sample->time)
+                       last_time = sample->time;
        }
 
-       te = (void *)data.raw_data;
-       if (session->sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) {
+       te = (void *)sample->raw_data;
+       if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) {
                char *event_str;
-               struct power_entry *pe;
-
-               pe = (void *)te;
-
+#ifdef SUPPORT_OLD_POWER_EVENTS
+               struct power_entry_old *peo;
+               peo = (void *)te;
+#endif
                event_str = perf_header__find_event(te->type);
 
                if (!event_str)
                        return 0;
 
-               if (strcmp(event_str, "power:power_start") == 0)
-                       c_state_start(pe->cpu_id, data.time, pe->value);
+               if (strcmp(event_str, "power:cpu_idle") == 0) {
+                       struct power_processor_entry *ppe = (void *)te;
+                       if (ppe->state == (u32)PWR_EVENT_EXIT)
+                               c_state_end(ppe->cpu_id, sample->time);
+                       else
+                               c_state_start(ppe->cpu_id, sample->time,
+                                             ppe->state);
+               }
+               else if (strcmp(event_str, "power:cpu_frequency") == 0) {
+                       struct power_processor_entry *ppe = (void *)te;
+                       p_state_change(ppe->cpu_id, sample->time, ppe->state);
+               }
+
+               else if (strcmp(event_str, "sched:sched_wakeup") == 0)
+                       sched_wakeup(sample->cpu, sample->time, sample->pid, te);
 
-               if (strcmp(event_str, "power:power_end") == 0)
-                       c_state_end(pe->cpu_id, data.time);
+               else if (strcmp(event_str, "sched:sched_switch") == 0)
+                       sched_switch(sample->cpu, sample->time, te);
 
-               if (strcmp(event_str, "power:power_frequency") == 0)
-                       p_state_change(pe->cpu_id, data.time, pe->value);
+#ifdef SUPPORT_OLD_POWER_EVENTS
+               if (use_old_power_events) {
+                       if (strcmp(event_str, "power:power_start") == 0)
+                               c_state_start(peo->cpu_id, sample->time,
+                                             peo->value);
 
-               if (strcmp(event_str, "sched:sched_wakeup") == 0)
-                       sched_wakeup(data.cpu, data.time, data.pid, te);
+                       else if (strcmp(event_str, "power:power_end") == 0)
+                               c_state_end(sample->cpu, sample->time);
 
-               if (strcmp(event_str, "sched:sched_switch") == 0)
-                       sched_switch(data.cpu, data.time, te);
+                       else if (strcmp(event_str,
+                                       "power:power_frequency") == 0)
+                               p_state_change(peo->cpu_id, sample->time,
+                                              peo->value);
+               }
+#endif
        }
        return 0;
 }
@@ -937,7 +970,8 @@ static struct perf_event_ops event_ops = {
 
 static int __cmd_timechart(void)
 {
-       struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+       struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+                                                        0, false, &event_ops);
        int ret = -EINVAL;
 
        if (session == NULL)
@@ -968,7 +1002,8 @@ static const char * const timechart_usage[] = {
        NULL
 };
 
-static const char *record_args[] = {
+#ifdef SUPPORT_OLD_POWER_EVENTS
+static const char * const record_old_args[] = {
        "record",
        "-a",
        "-R",
@@ -980,16 +1015,43 @@ static const char *record_args[] = {
        "-e", "sched:sched_wakeup",
        "-e", "sched:sched_switch",
 };
+#endif
+
+static const char * const record_new_args[] = {
+       "record",
+       "-a",
+       "-R",
+       "-f",
+       "-c", "1",
+       "-e", "power:cpu_frequency",
+       "-e", "power:cpu_idle",
+       "-e", "sched:sched_wakeup",
+       "-e", "sched:sched_switch",
+};
 
 static int __cmd_record(int argc, const char **argv)
 {
        unsigned int rec_argc, i, j;
        const char **rec_argv;
+       const char * const *record_args = record_new_args;
+       unsigned int record_elems = ARRAY_SIZE(record_new_args);
+
+#ifdef SUPPORT_OLD_POWER_EVENTS
+       if (!is_valid_tracepoint("power:cpu_idle") &&
+           is_valid_tracepoint("power:power_start")) {
+               use_old_power_events = 1;
+               record_args = record_old_args;
+               record_elems = ARRAY_SIZE(record_old_args);
+       }
+#endif
 
-       rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+       rec_argc = record_elems + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
-       for (i = 0; i < ARRAY_SIZE(record_args); i++)
+       if (rec_argv == NULL)
+               return -ENOMEM;
+
+       for (i = 0; i < record_elems; i++)
                rec_argv[i] = strdup(record_args[i]);
 
        for (j = 1; j < (unsigned int)argc; j++, i++)
@@ -1018,6 +1080,8 @@ static const struct option options[] = {
        OPT_CALLBACK('p', "process", NULL, "process",
                      "process selector. Pass a pid or process name.",
                       parse_process),
+       OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+                   "Look for files with symbols relative to this directory"),
        OPT_END()
 };
 
index dd625808c2a5332c4f733a59acfb1ee881faae3f..1e67ab9c7ebc46c5df87e6219ad023b5509207f2 100644 (file)
@@ -21,6 +21,7 @@
 #include "perf.h"
 
 #include "util/color.h"
+#include "util/evsel.h"
 #include "util/session.h"
 #include "util/symbol.h"
 #include "util/thread.h"
@@ -29,6 +30,7 @@
 #include "util/parse-options.h"
 #include "util/parse-events.h"
 #include "util/cpumap.h"
+#include "util/xyarray.h"
 
 #include "util/debug.h"
 
@@ -55,7 +57,7 @@
 #include <linux/unistd.h>
 #include <linux/types.h>
 
-static int                     *fd[MAX_NR_CPUS][MAX_COUNTERS];
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 
 static bool                    system_wide                     =  false;
 
@@ -66,10 +68,9 @@ static int                   print_entries;
 
 static int                     target_pid                      =     -1;
 static int                     target_tid                      =     -1;
-static pid_t                   *all_tids                       =      NULL;
-static int                     thread_num                      =      0;
+static struct thread_map       *threads;
 static bool                    inherit                         =  false;
-static int                     nr_cpus                         =      0;
+static struct cpu_map          *cpus;
 static int                     realtime_prio                   =      0;
 static bool                    group                           =  false;
 static unsigned int            page_size;
@@ -100,6 +101,7 @@ struct sym_entry            *sym_filter_entry               =   NULL;
 struct sym_entry               *sym_filter_entry_sched         =   NULL;
 static int                     sym_pcnt_filter                 =      5;
 static int                     sym_counter                     =      0;
+static struct perf_evsel       *sym_evsel                      =   NULL;
 static int                     display_weighted                =     -1;
 static const char              *cpu_list;
 
@@ -353,7 +355,7 @@ static void show_details(struct sym_entry *syme)
                return;
 
        symbol = sym_entry__symbol(syme);
-       printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
+       printf("Showing %s for %s\n", event_name(sym_evsel), symbol->name);
        printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);
 
        pthread_mutex_lock(&syme->src->lock);
@@ -460,7 +462,8 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
 static void print_sym_table(void)
 {
        int printed = 0, j;
-       int counter, snap = !display_weighted ? sym_counter : 0;
+       struct perf_evsel *counter;
+       int snap = !display_weighted ? sym_counter : 0;
        float samples_per_sec = samples/delay_secs;
        float ksamples_per_sec = kernel_samples/delay_secs;
        float us_samples_per_sec = (us_samples)/delay_secs;
@@ -532,7 +535,9 @@ static void print_sym_table(void)
        }
 
        if (nr_counters == 1 || !display_weighted) {
-               printf("%Ld", (u64)attrs[0].sample_period);
+               struct perf_evsel *first;
+               first = list_entry(evsel_list.next, struct perf_evsel, node);
+               printf("%Ld", first->attr.sample_period);
                if (freq)
                        printf("Hz ");
                else
@@ -540,9 +545,9 @@ static void print_sym_table(void)
        }
 
        if (!display_weighted)
-               printf("%s", event_name(sym_counter));
-       else for (counter = 0; counter < nr_counters; counter++) {
-               if (counter)
+               printf("%s", event_name(sym_evsel));
+       else list_for_each_entry(counter, &evsel_list, node) {
+               if (counter->idx)
                        printf("/");
 
                printf("%s", event_name(counter));
@@ -558,12 +563,12 @@ static void print_sym_table(void)
                printf(" (all");
 
        if (cpu_list)
-               printf(", CPU%s: %s)\n", nr_cpus > 1 ? "s" : "", cpu_list);
+               printf(", CPU%s: %s)\n", cpus->nr > 1 ? "s" : "", cpu_list);
        else {
                if (target_tid != -1)
                        printf(")\n");
                else
-                       printf(", %d CPU%s)\n", nr_cpus, nr_cpus > 1 ? "s" : "");
+                       printf(", %d CPU%s)\n", cpus->nr, cpus->nr > 1 ? "s" : "");
        }
 
        printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
@@ -739,7 +744,7 @@ static void print_mapped_keys(void)
        fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", print_entries);
 
        if (nr_counters > 1)
-               fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(sym_counter));
+               fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(sym_evsel));
 
        fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", count_filter);
 
@@ -826,19 +831,23 @@ static void handle_keypress(struct perf_session *session, int c)
                        break;
                case 'E':
                        if (nr_counters > 1) {
-                               int i;
-
                                fprintf(stderr, "\nAvailable events:");
-                               for (i = 0; i < nr_counters; i++)
-                                       fprintf(stderr, "\n\t%d %s", i, event_name(i));
+
+                               list_for_each_entry(sym_evsel, &evsel_list, node)
+                                       fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel));
 
                                prompt_integer(&sym_counter, "Enter details event counter");
 
                                if (sym_counter >= nr_counters) {
-                                       fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
+                                       sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
                                        sym_counter = 0;
+                                       fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel));
                                        sleep(1);
+                                       break;
                                }
+                               list_for_each_entry(sym_evsel, &evsel_list, node)
+                                       if (sym_evsel->idx == sym_counter)
+                                               break;
                        } else sym_counter = 0;
                        break;
                case 'f':
@@ -977,12 +986,13 @@ static int symbol_filter(struct map *map, struct symbol *sym)
 }
 
 static void event__process_sample(const event_t *self,
-                                struct perf_session *session, int counter)
+                                 struct sample_data *sample,
+                                 struct perf_session *session,
+                                 struct perf_evsel *evsel)
 {
        u64 ip = self->ip.ip;
        struct sym_entry *syme;
        struct addr_location al;
-       struct sample_data data;
        struct machine *machine;
        u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
@@ -1025,7 +1035,7 @@ static void event__process_sample(const event_t *self,
        if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
                exact_samples++;
 
-       if (event__preprocess_sample(self, session, &al, &data,
+       if (event__preprocess_sample(self, session, &al, sample,
                                     symbol_filter) < 0 ||
            al.filtered)
                return;
@@ -1071,9 +1081,9 @@ static void event__process_sample(const event_t *self,
 
        syme = symbol__priv(al.sym);
        if (!syme->skip) {
-               syme->count[counter]++;
+               syme->count[evsel->idx]++;
                syme->origin = origin;
-               record_precise_ip(syme, counter, ip);
+               record_precise_ip(syme, evsel->idx, ip);
                pthread_mutex_lock(&active_symbols_lock);
                if (list_empty(&syme->node) || !syme->node.next)
                        __list_insert_active_sym(syme);
@@ -1082,12 +1092,24 @@ static void event__process_sample(const event_t *self,
 }
 
 struct mmap_data {
-       int                     counter;
        void                    *base;
        int                     mask;
        unsigned int            prev;
 };
 
+static int perf_evsel__alloc_mmap_per_thread(struct perf_evsel *evsel,
+                                            int ncpus, int nthreads)
+{
+       evsel->priv = xyarray__new(ncpus, nthreads, sizeof(struct mmap_data));
+       return evsel->priv != NULL ? 0 : -ENOMEM;
+}
+
+static void perf_evsel__free_mmap(struct perf_evsel *evsel)
+{
+       xyarray__delete(evsel->priv);
+       evsel->priv = NULL;
+}
+
 static unsigned int mmap_read_head(struct mmap_data *md)
 {
        struct perf_event_mmap_page *pc = md->base;
@@ -1100,11 +1122,15 @@ static unsigned int mmap_read_head(struct mmap_data *md)
 }
 
 static void perf_session__mmap_read_counter(struct perf_session *self,
-                                           struct mmap_data *md)
+                                           struct perf_evsel *evsel,
+                                           int cpu, int thread_idx)
 {
+       struct xyarray *mmap_array = evsel->priv;
+       struct mmap_data *md = xyarray__entry(mmap_array, cpu, thread_idx);
        unsigned int head = mmap_read_head(md);
        unsigned int old = md->prev;
        unsigned char *data = md->base + page_size;
+       struct sample_data sample;
        int diff;
 
        /*
@@ -1152,10 +1178,11 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
                        event = &event_copy;
                }
 
+               event__parse_sample(event, self, &sample);
                if (event->header.type == PERF_RECORD_SAMPLE)
-                       event__process_sample(event, self, md->counter);
+                       event__process_sample(event, &sample, self, evsel);
                else
-                       event__process(event, self);
+                       event__process(event, &sample, self);
                old += size;
        }
 
@@ -1163,36 +1190,39 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
 }
 
 static struct pollfd *event_array;
-static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 
 static void perf_session__mmap_read(struct perf_session *self)
 {
-       int i, counter, thread_index;
+       struct perf_evsel *counter;
+       int i, thread_index;
 
-       for (i = 0; i < nr_cpus; i++) {
-               for (counter = 0; counter < nr_counters; counter++)
+       for (i = 0; i < cpus->nr; i++) {
+               list_for_each_entry(counter, &evsel_list, node) {
                        for (thread_index = 0;
-                               thread_index < thread_num;
+                               thread_index < threads->nr;
                                thread_index++) {
                                perf_session__mmap_read_counter(self,
-                                       &mmap_array[i][counter][thread_index]);
+                                       counter, i, thread_index);
                        }
+               }
        }
 }
 
 int nr_poll;
 int group_fd;
 
-static void start_counter(int i, int counter)
+static void start_counter(int i, struct perf_evsel *evsel)
 {
+       struct xyarray *mmap_array = evsel->priv;
+       struct mmap_data *mm;
        struct perf_event_attr *attr;
        int cpu = -1;
        int thread_index;
 
        if (target_tid == -1)
-               cpu = cpumap[i];
+               cpu = cpus->map[i];
 
-       attr = attrs + counter;
+       attr = &evsel->attr;
 
        attr->sample_type       = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
 
@@ -1205,16 +1235,18 @@ static void start_counter(int i, int counter)
        attr->inherit           = (cpu < 0) && inherit;
        attr->mmap              = 1;
 
-       for (thread_index = 0; thread_index < thread_num; thread_index++) {
+       for (thread_index = 0; thread_index < threads->nr; thread_index++) {
 try_again:
-               fd[i][counter][thread_index] = sys_perf_event_open(attr,
-                               all_tids[thread_index], cpu, group_fd, 0);
+               FD(evsel, i, thread_index) = sys_perf_event_open(attr,
+                               threads->map[thread_index], cpu, group_fd, 0);
 
-               if (fd[i][counter][thread_index] < 0) {
+               if (FD(evsel, i, thread_index) < 0) {
                        int err = errno;
 
                        if (err == EPERM || err == EACCES)
-                               die("No permission - are you root?\n");
+                               die("Permission error - are you root?\n"
+                                       "\t Consider tweaking"
+                                       " /proc/sys/kernel/perf_event_paranoid.\n");
                        /*
                         * If it's cycles then fall back to hrtimer
                         * based cpu-clock-tick sw counter, which
@@ -1231,30 +1263,30 @@ try_again:
                                goto try_again;
                        }
                        printf("\n");
-                       error("perfcounter syscall returned with %d (%s)\n",
-                                       fd[i][counter][thread_index], strerror(err));
+                       error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
+                                       FD(evsel, i, thread_index), strerror(err));
                        die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
                        exit(-1);
                }
-               assert(fd[i][counter][thread_index] >= 0);
-               fcntl(fd[i][counter][thread_index], F_SETFL, O_NONBLOCK);
+               assert(FD(evsel, i, thread_index) >= 0);
+               fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK);
 
                /*
                 * First counter acts as the group leader:
                 */
                if (group && group_fd == -1)
-                       group_fd = fd[i][counter][thread_index];
+                       group_fd = FD(evsel, i, thread_index);
 
-               event_array[nr_poll].fd = fd[i][counter][thread_index];
+               event_array[nr_poll].fd = FD(evsel, i, thread_index);
                event_array[nr_poll].events = POLLIN;
                nr_poll++;
 
-               mmap_array[i][counter][thread_index].counter = counter;
-               mmap_array[i][counter][thread_index].prev = 0;
-               mmap_array[i][counter][thread_index].mask = mmap_pages*page_size - 1;
-               mmap_array[i][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
-                               PROT_READ, MAP_SHARED, fd[i][counter][thread_index], 0);
-               if (mmap_array[i][counter][thread_index].base == MAP_FAILED)
+               mm = xyarray__entry(mmap_array, i, thread_index);
+               mm->prev = 0;
+               mm->mask = mmap_pages*page_size - 1;
+               mm->base = mmap(NULL, (mmap_pages+1)*page_size,
+                               PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0);
+               if (mm->base == MAP_FAILED)
                        die("failed to mmap with %d (%s)\n", errno, strerror(errno));
        }
 }
@@ -1262,13 +1294,13 @@ try_again:
 static int __cmd_top(void)
 {
        pthread_t thread;
-       int i, counter;
-       int ret;
+       struct perf_evsel *counter;
+       int i, ret;
        /*
         * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
         * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
         */
-       struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false);
+       struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
        if (session == NULL)
                return -ENOMEM;
 
@@ -1277,9 +1309,9 @@ static int __cmd_top(void)
        else
                event__synthesize_threads(event__process, session);
 
-       for (i = 0; i < nr_cpus; i++) {
+       for (i = 0; i < cpus->nr; i++) {
                group_fd = -1;
-               for (counter = 0; counter < nr_counters; counter++)
+               list_for_each_entry(counter, &evsel_list, node)
                        start_counter(i, counter);
        }
 
@@ -1368,8 +1400,8 @@ static const struct option options[] = {
 
 int cmd_top(int argc, const char **argv, const char *prefix __used)
 {
-       int counter;
-       int i,j;
+       struct perf_evsel *pos;
+       int status = -ENOMEM;
 
        page_size = sysconf(_SC_PAGE_SIZE);
 
@@ -1377,34 +1409,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
        if (argc)
                usage_with_options(top_usage, options);
 
-       if (target_pid != -1) {
+       if (target_pid != -1)
                target_tid = target_pid;
-               thread_num = find_all_tid(target_pid, &all_tids);
-               if (thread_num <= 0) {
-                       fprintf(stderr, "Can't find all threads of pid %d\n",
-                               target_pid);
-                       usage_with_options(top_usage, options);
-               }
-       } else {
-               all_tids=malloc(sizeof(pid_t));
-               if (!all_tids)
-                       return -ENOMEM;
 
-               all_tids[0] = target_tid;
-               thread_num = 1;
+       threads = thread_map__new(target_pid, target_tid);
+       if (threads == NULL) {
+               pr_err("Problems finding threads of monitor\n");
+               usage_with_options(top_usage, options);
        }
 
-       for (i = 0; i < MAX_NR_CPUS; i++) {
-               for (j = 0; j < MAX_COUNTERS; j++) {
-                       fd[i][j] = malloc(sizeof(int)*thread_num);
-                       mmap_array[i][j] = zalloc(
-                               sizeof(struct mmap_data)*thread_num);
-                       if (!fd[i][j] || !mmap_array[i][j])
-                               return -ENOMEM;
-               }
-       }
-       event_array = malloc(
-               sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num);
+       event_array = malloc((sizeof(struct pollfd) *
+                             MAX_NR_CPUS * MAX_COUNTERS * threads->nr));
        if (!event_array)
                return -ENOMEM;
 
@@ -1415,15 +1430,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                cpu_list = NULL;
        }
 
-       if (!nr_counters)
-               nr_counters = 1;
-
-       symbol_conf.priv_size = (sizeof(struct sym_entry) +
-                                (nr_counters + 1) * sizeof(unsigned long));
-
-       symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
-       if (symbol__init() < 0)
-               return -1;
+       if (!nr_counters && perf_evsel_list__create_default() < 0) {
+               pr_err("Not enough memory for event selector list\n");
+               return -ENOMEM;
+       }
 
        if (delay_secs < 1)
                delay_secs = 1;
@@ -1440,23 +1450,33 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                exit(EXIT_FAILURE);
        }
 
-       /*
-        * Fill in the ones not specifically initialized via -c:
-        */
-       for (counter = 0; counter < nr_counters; counter++) {
-               if (attrs[counter].sample_period)
+       if (target_tid != -1)
+               cpus = cpu_map__dummy_new();
+       else
+               cpus = cpu_map__new(cpu_list);
+
+       if (cpus == NULL)
+               usage_with_options(top_usage, options);
+
+       list_for_each_entry(pos, &evsel_list, node) {
+               if (perf_evsel__alloc_mmap_per_thread(pos, cpus->nr, threads->nr) < 0 ||
+                   perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+                       goto out_free_fd;
+               /*
+                * Fill in the ones not specifically initialized via -c:
+                */
+               if (pos->attr.sample_period)
                        continue;
 
-               attrs[counter].sample_period = default_interval;
+               pos->attr.sample_period = default_interval;
        }
 
-       if (target_tid != -1)
-               nr_cpus = 1;
-       else
-               nr_cpus = read_cpu_map(cpu_list);
+       symbol_conf.priv_size = (sizeof(struct sym_entry) +
+                                (nr_counters + 1) * sizeof(unsigned long));
 
-       if (nr_cpus < 1)
-               usage_with_options(top_usage, options);
+       symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+       if (symbol__init() < 0)
+               return -1;
 
        get_term_dimensions(&winsize);
        if (print_entries == 0) {
@@ -1464,5 +1484,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                signal(SIGWINCH, sig_winch_handler);
        }
 
-       return __cmd_top();
+       status = __cmd_top();
+out_free_fd:
+       list_for_each_entry(pos, &evsel_list, node)
+               perf_evsel__free_mmap(pos);
+
+       return status;
 }
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
deleted file mode 100644 (file)
index 86cfe38..0000000
+++ /dev/null
@@ -1,826 +0,0 @@
-#include "builtin.h"
-
-#include "perf.h"
-#include "util/cache.h"
-#include "util/debug.h"
-#include "util/exec_cmd.h"
-#include "util/header.h"
-#include "util/parse-options.h"
-#include "util/session.h"
-#include "util/symbol.h"
-#include "util/thread.h"
-#include "util/trace-event.h"
-#include "util/parse-options.h"
-#include "util/util.h"
-
-static char const              *script_name;
-static char const              *generate_script_lang;
-static bool                    debug_mode;
-static u64                     last_timestamp;
-static u64                     nr_unordered;
-extern const struct option     record_options[];
-
-static int default_start_script(const char *script __unused,
-                               int argc __unused,
-                               const char **argv __unused)
-{
-       return 0;
-}
-
-static int default_stop_script(void)
-{
-       return 0;
-}
-
-static int default_generate_script(const char *outfile __unused)
-{
-       return 0;
-}
-
-static struct scripting_ops default_scripting_ops = {
-       .start_script           = default_start_script,
-       .stop_script            = default_stop_script,
-       .process_event          = print_event,
-       .generate_script        = default_generate_script,
-};
-
-static struct scripting_ops    *scripting_ops;
-
-static void setup_scripting(void)
-{
-       setup_perl_scripting();
-       setup_python_scripting();
-
-       scripting_ops = &default_scripting_ops;
-}
-
-static int cleanup_scripting(void)
-{
-       pr_debug("\nperf trace script stopped\n");
-
-       return scripting_ops->stop_script();
-}
-
-static char const              *input_name = "perf.data";
-
-static int process_sample_event(event_t *event, struct perf_session *session)
-{
-       struct sample_data data;
-       struct thread *thread;
-
-       memset(&data, 0, sizeof(data));
-       data.time = -1;
-       data.cpu = -1;
-       data.period = 1;
-
-       event__parse_sample(event, session->sample_type, &data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-                   data.pid, data.tid, data.ip, data.period);
-
-       thread = perf_session__findnew(session, event->ip.pid);
-       if (thread == NULL) {
-               pr_debug("problem processing %d event, skipping it.\n",
-                        event->header.type);
-               return -1;
-       }
-
-       if (session->sample_type & PERF_SAMPLE_RAW) {
-               if (debug_mode) {
-                       if (data.time < last_timestamp) {
-                               pr_err("Samples misordered, previous: %llu "
-                                       "this: %llu\n", last_timestamp,
-                                       data.time);
-                               nr_unordered++;
-                       }
-                       last_timestamp = data.time;
-                       return 0;
-               }
-               /*
-                * FIXME: better resolve from pid from the struct trace_entry
-                * field, although it should be the same than this perf
-                * event pid
-                */
-               scripting_ops->process_event(data.cpu, data.raw_data,
-                                            data.raw_size,
-                                            data.time, thread->comm);
-       }
-
-       session->hists.stats.total_period += data.period;
-       return 0;
-}
-
-static u64 nr_lost;
-
-static int process_lost_event(event_t *event, struct perf_session *session __used)
-{
-       nr_lost += event->lost.lost;
-
-       return 0;
-}
-
-static struct perf_event_ops event_ops = {
-       .sample = process_sample_event,
-       .comm   = event__process_comm,
-       .attr   = event__process_attr,
-       .event_type = event__process_event_type,
-       .tracing_data = event__process_tracing_data,
-       .build_id = event__process_build_id,
-       .lost = process_lost_event,
-       .ordered_samples = true,
-};
-
-extern volatile int session_done;
-
-static void sig_handler(int sig __unused)
-{
-       session_done = 1;
-}
-
-static int __cmd_trace(struct perf_session *session)
-{
-       int ret;
-
-       signal(SIGINT, sig_handler);
-
-       ret = perf_session__process_events(session, &event_ops);
-
-       if (debug_mode) {
-               pr_err("Misordered timestamps: %llu\n", nr_unordered);
-               pr_err("Lost events: %llu\n", nr_lost);
-       }
-
-       return ret;
-}
-
-struct script_spec {
-       struct list_head        node;
-       struct scripting_ops    *ops;
-       char                    spec[0];
-};
-
-LIST_HEAD(script_specs);
-
-static struct script_spec *script_spec__new(const char *spec,
-                                           struct scripting_ops *ops)
-{
-       struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1);
-
-       if (s != NULL) {
-               strcpy(s->spec, spec);
-               s->ops = ops;
-       }
-
-       return s;
-}
-
-static void script_spec__delete(struct script_spec *s)
-{
-       free(s->spec);
-       free(s);
-}
-
-static void script_spec__add(struct script_spec *s)
-{
-       list_add_tail(&s->node, &script_specs);
-}
-
-static struct script_spec *script_spec__find(const char *spec)
-{
-       struct script_spec *s;
-
-       list_for_each_entry(s, &script_specs, node)
-               if (strcasecmp(s->spec, spec) == 0)
-                       return s;
-       return NULL;
-}
-
-static struct script_spec *script_spec__findnew(const char *spec,
-                                               struct scripting_ops *ops)
-{
-       struct script_spec *s = script_spec__find(spec);
-
-       if (s)
-               return s;
-
-       s = script_spec__new(spec, ops);
-       if (!s)
-               goto out_delete_spec;
-
-       script_spec__add(s);
-
-       return s;
-
-out_delete_spec:
-       script_spec__delete(s);
-
-       return NULL;
-}
-
-int script_spec_register(const char *spec, struct scripting_ops *ops)
-{
-       struct script_spec *s;
-
-       s = script_spec__find(spec);
-       if (s)
-               return -1;
-
-       s = script_spec__findnew(spec, ops);
-       if (!s)
-               return -1;
-
-       return 0;
-}
-
-static struct scripting_ops *script_spec__lookup(const char *spec)
-{
-       struct script_spec *s = script_spec__find(spec);
-       if (!s)
-               return NULL;
-
-       return s->ops;
-}
-
-static void list_available_languages(void)
-{
-       struct script_spec *s;
-
-       fprintf(stderr, "\n");
-       fprintf(stderr, "Scripting language extensions (used in "
-               "perf trace -s [spec:]script.[spec]):\n\n");
-
-       list_for_each_entry(s, &script_specs, node)
-               fprintf(stderr, "  %-42s [%s]\n", s->spec, s->ops->name);
-
-       fprintf(stderr, "\n");
-}
-
-static int parse_scriptname(const struct option *opt __used,
-                           const char *str, int unset __used)
-{
-       char spec[PATH_MAX];
-       const char *script, *ext;
-       int len;
-
-       if (strcmp(str, "lang") == 0) {
-               list_available_languages();
-               exit(0);
-       }
-
-       script = strchr(str, ':');
-       if (script) {
-               len = script - str;
-               if (len >= PATH_MAX) {
-                       fprintf(stderr, "invalid language specifier");
-                       return -1;
-               }
-               strncpy(spec, str, len);
-               spec[len] = '\0';
-               scripting_ops = script_spec__lookup(spec);
-               if (!scripting_ops) {
-                       fprintf(stderr, "invalid language specifier");
-                       return -1;
-               }
-               script++;
-       } else {
-               script = str;
-               ext = strrchr(script, '.');
-               if (!ext) {
-                       fprintf(stderr, "invalid script extension");
-                       return -1;
-               }
-               scripting_ops = script_spec__lookup(++ext);
-               if (!scripting_ops) {
-                       fprintf(stderr, "invalid script extension");
-                       return -1;
-               }
-       }
-
-       script_name = strdup(script);
-
-       return 0;
-}
-
-#define for_each_lang(scripts_dir, lang_dirent, lang_next)             \
-       while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) &&     \
-              lang_next)                                               \
-               if (lang_dirent.d_type == DT_DIR &&                     \
-                   (strcmp(lang_dirent.d_name, ".")) &&                \
-                   (strcmp(lang_dirent.d_name, "..")))
-
-#define for_each_script(lang_dir, script_dirent, script_next)          \
-       while (!readdir_r(lang_dir, &script_dirent, &script_next) &&    \
-              script_next)                                             \
-               if (script_dirent.d_type != DT_DIR)
-
-
-#define RECORD_SUFFIX                  "-record"
-#define REPORT_SUFFIX                  "-report"
-
-struct script_desc {
-       struct list_head        node;
-       char                    *name;
-       char                    *half_liner;
-       char                    *args;
-};
-
-LIST_HEAD(script_descs);
-
-static struct script_desc *script_desc__new(const char *name)
-{
-       struct script_desc *s = zalloc(sizeof(*s));
-
-       if (s != NULL && name)
-               s->name = strdup(name);
-
-       return s;
-}
-
-static void script_desc__delete(struct script_desc *s)
-{
-       free(s->name);
-       free(s->half_liner);
-       free(s->args);
-       free(s);
-}
-
-static void script_desc__add(struct script_desc *s)
-{
-       list_add_tail(&s->node, &script_descs);
-}
-
-static struct script_desc *script_desc__find(const char *name)
-{
-       struct script_desc *s;
-
-       list_for_each_entry(s, &script_descs, node)
-               if (strcasecmp(s->name, name) == 0)
-                       return s;
-       return NULL;
-}
-
-static struct script_desc *script_desc__findnew(const char *name)
-{
-       struct script_desc *s = script_desc__find(name);
-
-       if (s)
-               return s;
-
-       s = script_desc__new(name);
-       if (!s)
-               goto out_delete_desc;
-
-       script_desc__add(s);
-
-       return s;
-
-out_delete_desc:
-       script_desc__delete(s);
-
-       return NULL;
-}
-
-static char *ends_with(char *str, const char *suffix)
-{
-       size_t suffix_len = strlen(suffix);
-       char *p = str;
-
-       if (strlen(str) > suffix_len) {
-               p = str + strlen(str) - suffix_len;
-               if (!strncmp(p, suffix, suffix_len))
-                       return p;
-       }
-
-       return NULL;
-}
-
-static char *ltrim(char *str)
-{
-       int len = strlen(str);
-
-       while (len && isspace(*str)) {
-               len--;
-               str++;
-       }
-
-       return str;
-}
-
-static int read_script_info(struct script_desc *desc, const char *filename)
-{
-       char line[BUFSIZ], *p;
-       FILE *fp;
-
-       fp = fopen(filename, "r");
-       if (!fp)
-               return -1;
-
-       while (fgets(line, sizeof(line), fp)) {
-               p = ltrim(line);
-               if (strlen(p) == 0)
-                       continue;
-               if (*p != '#')
-                       continue;
-               p++;
-               if (strlen(p) && *p == '!')
-                       continue;
-
-               p = ltrim(p);
-               if (strlen(p) && p[strlen(p) - 1] == '\n')
-                       p[strlen(p) - 1] = '\0';
-
-               if (!strncmp(p, "description:", strlen("description:"))) {
-                       p += strlen("description:");
-                       desc->half_liner = strdup(ltrim(p));
-                       continue;
-               }
-
-               if (!strncmp(p, "args:", strlen("args:"))) {
-                       p += strlen("args:");
-                       desc->args = strdup(ltrim(p));
-                       continue;
-               }
-       }
-
-       fclose(fp);
-
-       return 0;
-}
-
-static int list_available_scripts(const struct option *opt __used,
-                                 const char *s __used, int unset __used)
-{
-       struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
-       char scripts_path[MAXPATHLEN];
-       DIR *scripts_dir, *lang_dir;
-       char script_path[MAXPATHLEN];
-       char lang_path[MAXPATHLEN];
-       struct script_desc *desc;
-       char first_half[BUFSIZ];
-       char *script_root;
-       char *str;
-
-       snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
-
-       scripts_dir = opendir(scripts_path);
-       if (!scripts_dir)
-               return -1;
-
-       for_each_lang(scripts_dir, lang_dirent, lang_next) {
-               snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
-                        lang_dirent.d_name);
-               lang_dir = opendir(lang_path);
-               if (!lang_dir)
-                       continue;
-
-               for_each_script(lang_dir, script_dirent, script_next) {
-                       script_root = strdup(script_dirent.d_name);
-                       str = ends_with(script_root, REPORT_SUFFIX);
-                       if (str) {
-                               *str = '\0';
-                               desc = script_desc__findnew(script_root);
-                               snprintf(script_path, MAXPATHLEN, "%s/%s",
-                                        lang_path, script_dirent.d_name);
-                               read_script_info(desc, script_path);
-                       }
-                       free(script_root);
-               }
-       }
-
-       fprintf(stdout, "List of available trace scripts:\n");
-       list_for_each_entry(desc, &script_descs, node) {
-               sprintf(first_half, "%s %s", desc->name,
-                       desc->args ? desc->args : "");
-               fprintf(stdout, "  %-36s %s\n", first_half,
-                       desc->half_liner ? desc->half_liner : "");
-       }
-
-       exit(0);
-}
-
-static char *get_script_path(const char *script_root, const char *suffix)
-{
-       struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
-       char scripts_path[MAXPATHLEN];
-       char script_path[MAXPATHLEN];
-       DIR *scripts_dir, *lang_dir;
-       char lang_path[MAXPATHLEN];
-       char *str, *__script_root;
-       char *path = NULL;
-
-       snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
-
-       scripts_dir = opendir(scripts_path);
-       if (!scripts_dir)
-               return NULL;
-
-       for_each_lang(scripts_dir, lang_dirent, lang_next) {
-               snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
-                        lang_dirent.d_name);
-               lang_dir = opendir(lang_path);
-               if (!lang_dir)
-                       continue;
-
-               for_each_script(lang_dir, script_dirent, script_next) {
-                       __script_root = strdup(script_dirent.d_name);
-                       str = ends_with(__script_root, suffix);
-                       if (str) {
-                               *str = '\0';
-                               if (strcmp(__script_root, script_root))
-                                       continue;
-                               snprintf(script_path, MAXPATHLEN, "%s/%s",
-                                        lang_path, script_dirent.d_name);
-                               path = strdup(script_path);
-                               free(__script_root);
-                               break;
-                       }
-                       free(__script_root);
-               }
-       }
-
-       return path;
-}
-
-static bool is_top_script(const char *script_path)
-{
-       return ends_with((char *)script_path, "top") == NULL ? false : true;
-}
-
-static int has_required_arg(char *script_path)
-{
-       struct script_desc *desc;
-       int n_args = 0;
-       char *p;
-
-       desc = script_desc__new(NULL);
-
-       if (read_script_info(desc, script_path))
-               goto out;
-
-       if (!desc->args)
-               goto out;
-
-       for (p = desc->args; *p; p++)
-               if (*p == '<')
-                       n_args++;
-out:
-       script_desc__delete(desc);
-
-       return n_args;
-}
-
-static const char * const trace_usage[] = {
-       "perf trace [<options>]",
-       "perf trace [<options>] record <script> [<record-options>] <command>",
-       "perf trace [<options>] report <script> [script-args]",
-       "perf trace [<options>] <script> [<record-options>] <command>",
-       "perf trace [<options>] <top-script> [script-args]",
-       NULL
-};
-
-static const struct option options[] = {
-       OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
-                   "dump raw trace in ASCII"),
-       OPT_INCR('v', "verbose", &verbose,
-                   "be more verbose (show symbol address, etc)"),
-       OPT_BOOLEAN('L', "Latency", &latency_format,
-                   "show latency attributes (irqs/preemption disabled, etc)"),
-       OPT_CALLBACK_NOOPT('l', "list", NULL, NULL, "list available scripts",
-                          list_available_scripts),
-       OPT_CALLBACK('s', "script", NULL, "name",
-                    "script file name (lang:script name, script name, or *)",
-                    parse_scriptname),
-       OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
-                  "generate perf-trace.xx script in specified language"),
-       OPT_STRING('i', "input", &input_name, "file",
-                   "input file name"),
-       OPT_BOOLEAN('d', "debug-mode", &debug_mode,
-                  "do various checks like samples ordering and lost events"),
-
-       OPT_END()
-};
-
-static bool have_cmd(int argc, const char **argv)
-{
-       char **__argv = malloc(sizeof(const char *) * argc);
-
-       if (!__argv)
-               die("malloc");
-       memcpy(__argv, argv, sizeof(const char *) * argc);
-       argc = parse_options(argc, (const char **)__argv, record_options,
-                            NULL, PARSE_OPT_STOP_AT_NON_OPTION);
-       free(__argv);
-
-       return argc != 0;
-}
-
-int cmd_trace(int argc, const char **argv, const char *prefix __used)
-{
-       char *rec_script_path = NULL;
-       char *rep_script_path = NULL;
-       struct perf_session *session;
-       char *script_path = NULL;
-       const char **__argv;
-       bool system_wide;
-       int i, j, err;
-
-       setup_scripting();
-
-       argc = parse_options(argc, argv, options, trace_usage,
-                            PARSE_OPT_STOP_AT_NON_OPTION);
-
-       if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
-               rec_script_path = get_script_path(argv[1], RECORD_SUFFIX);
-               if (!rec_script_path)
-                       return cmd_record(argc, argv, NULL);
-       }
-
-       if (argc > 1 && !strncmp(argv[0], "rep", strlen("rep"))) {
-               rep_script_path = get_script_path(argv[1], REPORT_SUFFIX);
-               if (!rep_script_path) {
-                       fprintf(stderr,
-                               "Please specify a valid report script"
-                               "(see 'perf trace -l' for listing)\n");
-                       return -1;
-               }
-       }
-
-       /* make sure PERF_EXEC_PATH is set for scripts */
-       perf_set_argv_exec_path(perf_exec_path());
-
-       if (argc && !script_name && !rec_script_path && !rep_script_path) {
-               int live_pipe[2];
-               int rep_args;
-               pid_t pid;
-
-               rec_script_path = get_script_path(argv[0], RECORD_SUFFIX);
-               rep_script_path = get_script_path(argv[0], REPORT_SUFFIX);
-
-               if (!rec_script_path && !rep_script_path) {
-                       fprintf(stderr, " Couldn't find script %s\n\n See perf"
-                               " trace -l for available scripts.\n", argv[0]);
-                       usage_with_options(trace_usage, options);
-               }
-
-               if (is_top_script(argv[0])) {
-                       rep_args = argc - 1;
-               } else {
-                       int rec_args;
-
-                       rep_args = has_required_arg(rep_script_path);
-                       rec_args = (argc - 1) - rep_args;
-                       if (rec_args < 0) {
-                               fprintf(stderr, " %s script requires options."
-                                       "\n\n See perf trace -l for available "
-                                       "scripts and options.\n", argv[0]);
-                               usage_with_options(trace_usage, options);
-                       }
-               }
-
-               if (pipe(live_pipe) < 0) {
-                       perror("failed to create pipe");
-                       exit(-1);
-               }
-
-               pid = fork();
-               if (pid < 0) {
-                       perror("failed to fork");
-                       exit(-1);
-               }
-
-               if (!pid) {
-                       system_wide = true;
-                       j = 0;
-
-                       dup2(live_pipe[1], 1);
-                       close(live_pipe[0]);
-
-                       if (!is_top_script(argv[0]))
-                               system_wide = !have_cmd(argc - rep_args,
-                                                       &argv[rep_args]);
-
-                       __argv = malloc((argc + 6) * sizeof(const char *));
-                       if (!__argv)
-                               die("malloc");
-
-                       __argv[j++] = "/bin/sh";
-                       __argv[j++] = rec_script_path;
-                       if (system_wide)
-                               __argv[j++] = "-a";
-                       __argv[j++] = "-q";
-                       __argv[j++] = "-o";
-                       __argv[j++] = "-";
-                       for (i = rep_args + 1; i < argc; i++)
-                               __argv[j++] = argv[i];
-                       __argv[j++] = NULL;
-
-                       execvp("/bin/sh", (char **)__argv);
-                       free(__argv);
-                       exit(-1);
-               }
-
-               dup2(live_pipe[0], 0);
-               close(live_pipe[1]);
-
-               __argv = malloc((argc + 4) * sizeof(const char *));
-               if (!__argv)
-                       die("malloc");
-               j = 0;
-               __argv[j++] = "/bin/sh";
-               __argv[j++] = rep_script_path;
-               for (i = 1; i < rep_args + 1; i++)
-                       __argv[j++] = argv[i];
-               __argv[j++] = "-i";
-               __argv[j++] = "-";
-               __argv[j++] = NULL;
-
-               execvp("/bin/sh", (char **)__argv);
-               free(__argv);
-               exit(-1);
-       }
-
-       if (rec_script_path)
-               script_path = rec_script_path;
-       if (rep_script_path)
-               script_path = rep_script_path;
-
-       if (script_path) {
-               system_wide = false;
-               j = 0;
-
-               if (rec_script_path)
-                       system_wide = !have_cmd(argc - 1, &argv[1]);
-
-               __argv = malloc((argc + 2) * sizeof(const char *));
-               if (!__argv)
-                       die("malloc");
-               __argv[j++] = "/bin/sh";
-               __argv[j++] = script_path;
-               if (system_wide)
-                       __argv[j++] = "-a";
-               for (i = 2; i < argc; i++)
-                       __argv[j++] = argv[i];
-               __argv[j++] = NULL;
-
-               execvp("/bin/sh", (char **)__argv);
-               free(__argv);
-               exit(-1);
-       }
-
-       if (symbol__init() < 0)
-               return -1;
-       if (!script_name)
-               setup_pager();
-
-       session = perf_session__new(input_name, O_RDONLY, 0, false);
-       if (session == NULL)
-               return -ENOMEM;
-
-       if (strcmp(input_name, "-") &&
-           !perf_session__has_traces(session, "record -R"))
-               return -EINVAL;
-
-       if (generate_script_lang) {
-               struct stat perf_stat;
-
-               int input = open(input_name, O_RDONLY);
-               if (input < 0) {
-                       perror("failed to open file");
-                       exit(-1);
-               }
-
-               err = fstat(input, &perf_stat);
-               if (err < 0) {
-                       perror("failed to stat file");
-                       exit(-1);
-               }
-
-               if (!perf_stat.st_size) {
-                       fprintf(stderr, "zero-sized file, nothing to do!\n");
-                       exit(0);
-               }
-
-               scripting_ops = script_spec__lookup(generate_script_lang);
-               if (!scripting_ops) {
-                       fprintf(stderr, "invalid language specifier");
-                       return -1;
-               }
-
-               err = scripting_ops->generate_script("perf-trace");
-               goto out;
-       }
-
-       if (script_name) {
-               err = scripting_ops->start_script(script_name, argc, argv);
-               if (err)
-                       goto out;
-               pr_debug("perf trace started with script %s\n\n", script_name);
-       }
-
-       err = __cmd_trace(session);
-
-       perf_session__delete(session);
-       cleanup_scripting();
-out:
-       return err;
-}
index 921245b28583e448cce49008b2482e0f59193454..c7798c7f24ed737f03a4376485be5a2e14b3aef4 100644 (file)
@@ -27,7 +27,7 @@ extern int cmd_report(int argc, const char **argv, const char *prefix);
 extern int cmd_stat(int argc, const char **argv, const char *prefix);
 extern int cmd_timechart(int argc, const char **argv, const char *prefix);
 extern int cmd_top(int argc, const char **argv, const char *prefix);
-extern int cmd_trace(int argc, const char **argv, const char *prefix);
+extern int cmd_script(int argc, const char **argv, const char *prefix);
 extern int cmd_version(int argc, const char **argv, const char *prefix);
 extern int cmd_probe(int argc, const char **argv, const char *prefix);
 extern int cmd_kmem(int argc, const char **argv, const char *prefix);
index 949d77fc0b9718d812a8906883b7a89ef99c49f0..16b5088cf8f4bbb2259aee8ad90483cd926a0935 100644 (file)
@@ -16,7 +16,7 @@ perf-report                   mainporcelain common
 perf-stat                      mainporcelain common
 perf-timechart                 mainporcelain common
 perf-top                       mainporcelain common
-perf-trace                     mainporcelain common
+perf-script                    mainporcelain common
 perf-probe                     mainporcelain common
 perf-kmem                      mainporcelain common
 perf-lock                      mainporcelain common
index b253db634f04b7e8ddfddd1cc33bb3ce8343a49a..b041ca67a2cbdee01c87ff92ec21deda82ae1b5c 100644 (file)
@@ -9,8 +9,8 @@ endef
 ifndef NO_DWARF
 define SOURCE_DWARF
 #include <dwarf.h>
-#include <libdw.h>
-#include <version.h>
+#include <elfutils/libdw.h>
+#include <elfutils/version.h>
 #ifndef _ELFUTILS_PREREQ
 #error
 #endif
index cdd6c03f1e14c132e550b85e07b22e7621a710d2..5b1ecd66bb36a053b6427020f9aa3361d4ec1265 100644 (file)
@@ -286,6 +286,8 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
        status = p->fn(argc, argv, prefix);
        exit_browser(status);
 
+       perf_evsel_list__delete();
+
        if (status)
                return status & 0xff;
 
@@ -323,7 +325,7 @@ static void handle_internal_command(int argc, const char **argv)
                { "top",        cmd_top,        0 },
                { "annotate",   cmd_annotate,   0 },
                { "version",    cmd_version,    0 },
-               { "trace",      cmd_trace,      0 },
+               { "script",     cmd_script,     0 },
                { "sched",      cmd_sched,      0 },
                { "probe",      cmd_probe,      0 },
                { "kmem",       cmd_kmem,       0 },
index 01a64ad693f2a7c4ac8600e8d37f9a90ffad1b00..790ceba6ad3f4a4102a1affa81a637ef774d7d43 100644 (file)
@@ -8,7 +8,7 @@
 
 #line 1 "Context.xs"
 /*
- * Context.xs.  XS interfaces for perf trace.
+ * Context.xs.  XS interfaces for perf script.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
index 549cf0467d309eda5be9c0faee7897a5608371b4..c1e2ed1ed34e4e16e3398acd12f3e5723a81ddee 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Context.xs.  XS interfaces for perf trace.
+ * Context.xs.  XS interfaces for perf script.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
@@ -23,7 +23,7 @@
 #include "perl.h"
 #include "XSUB.h"
 #include "../../../perf.h"
-#include "../../../util/trace-event.h"
+#include "../../../util/script-event.h"
 
 MODULE = Perf::Trace::Context          PACKAGE = Perf::Trace::Context
 PROTOTYPES: ENABLE
index 9a970763079144666b9bebb3f6d626bd7009f298..2f0c7f3043ee5d992727b50f11c404d33cfb009e 100644 (file)
@@ -1,7 +1,7 @@
 Perf-Trace-Util version 0.01
 ============================
 
-This module contains utility functions for use with perf trace.
+This module contains utility functions for use with perf script.
 
 Core.pm and Util.pm are pure Perl modules; Core.pm contains routines
 that the core perf support for Perl calls on and should always be
@@ -33,7 +33,7 @@ After you do that:
 
 INSTALLATION
 
-Building perf with perf trace Perl scripting should install this
+Building perf with perf script Perl scripting should install this
 module in the right place.
 
 You should make sure libperl and ExtUtils/Embed.pm are installed first
index 6c7f3659cb1769ca8d40bd19816d54be94dd47ca..4e2f6039ac920f60192a7cee033339afbdf26042 100644 (file)
@@ -34,7 +34,7 @@ Perf::Trace::Context - Perl extension for accessing functions in perf.
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
index 9df376a9f62971e355de96b14324c52fa9b469ea..9158458d3eeb118c35357a29af763f533e370247 100644 (file)
@@ -163,7 +163,7 @@ sub dump_symbolic_fields
 __END__
 =head1 NAME
 
-Perf::Trace::Core - Perl extension for perf trace
+Perf::Trace::Core - Perl extension for perf script
 
 =head1 SYNOPSIS
 
@@ -171,7 +171,7 @@ Perf::Trace::Core - Perl extension for perf trace
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
index d94b40c8ac857227516b6f408bf5b13a9fea03cd..053500114625515d7745757178274e097c0d6aa2 100644 (file)
@@ -65,7 +65,7 @@ sub clear_term
 __END__
 =head1 NAME
 
-Perf::Trace::Util - Perl extension for perf trace
+Perf::Trace::Util - Perl extension for perf script
 
 =head1 SYNOPSIS
 
@@ -73,7 +73,7 @@ Perf::Trace::Util - Perl extension for perf trace
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
index 4028d92dc4ae6602927d3c82c02f3973506d995e..9f83cc1ad8ba253acabff31a32987566976e6d7c 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
index ba25f4d41fb02a1d40303ebcdb8ba8f17ff8470d..77200b3f31003c7bd8c3a137106f461223b67a43 100644 (file)
@@ -7,7 +7,4 @@ if [ $# -lt 1 ] ; then
 fi
 comm=$1
 shift
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
index 641a3f5d085c6148e9437e10704f6953d7d1eca5..a27b9f311f959a626d9e481f39cbb65b060d743f 100644 (file)
@@ -1,6 +1,3 @@
 #!/bin/bash
 # description: system-wide r/w activity
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
index 4918dba77021e676fdfeaf6c7d3e90105979ccc8..83e11ec2e190988c142aab5d4644778b812b9447 100644 (file)
@@ -17,7 +17,4 @@ if [ "$n_args" -gt 0 ] ; then
     interval=$1
     shift
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
index 49052ebcb6326d8aa13ea309994d965ca045f58b..889e8130cca55c7235ae749c83c5a9aedb92d4fd 100644 (file)
@@ -1,6 +1,3 @@
 #!/bin/bash
 # description: system-wide min/max/avg wakeup latency
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
index df0c65f4ca93de35b07bbe9e24f5a966d8b75ea8..6d91411d248caa1a0f6ade3ee39645cfb51b631d 100644 (file)
@@ -1,7 +1,3 @@
 #!/bin/bash
 # description: workqueue stats (ins/exe/create/destroy)
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
-
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
index 4e7dc0a407a5fbf65d0bcfab434e6dfad23f0733..4e7076c2061610044f766dddff136bf785166256 100644 (file)
@@ -1,4 +1,4 @@
-# perf trace event handlers, generated by perf trace -g perl
+# perf script event handlers, generated by perf script -g perl
 # (c) 2009, Tom Zanussi <tzanussi@gmail.com>
 # Licensed under the terms of the GNU GPL License version 2
 
index 2a39097687b9f70dffb2fe83f4477539ff8e98b4..74844ee2be3ef691ce9fd1c7129a158b5080a975 100644 (file)
@@ -18,7 +18,7 @@ use lib "./Perf-Trace-Util/lib";
 use Perf::Trace::Core;
 use Perf::Trace::Util;
 
-my $usage = "perf trace -s rw-by-file.pl <comm>\n";
+my $usage = "perf script -s rw-by-file.pl <comm>\n";
 
 my $for_comm = shift or die $usage;
 
index b84b12699b70ba0731bdbfdd4115d62558990184..a8eaff5119e09fa953626d0ed58ddfabae18bb00 100644 (file)
@@ -10,7 +10,7 @@
 #     workqueue:workqueue_destruction -e workqueue:workqueue_execution
 #     -e workqueue:workqueue_insertion
 #
-#   perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl
+#   perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl
 
 use 5.010000;
 use strict;
index 957085dd5d8d1a2ff17ee4bb6ad3c3160f8df508..315067b8f5522ae9cafbef1df506aca814e93012 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Context.c.  Python interfaces for perf trace.
+ * Context.c.  Python interfaces for perf script.
  *
  * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
  *
index aad7525bca1dc5a45ca72cd4be79eec7411a2bb0..de7211e4fa471ac0a0475f15097c0e175fdf4da9 100644 (file)
@@ -1,4 +1,4 @@
-# Core.py - Python extension for perf trace, core functions
+# Core.py - Python extension for perf script, core functions
 #
 # Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
 #
index ae9a56e43e05e37981774e9dc9ec3d0ed59f6ba2..fdd92f699055713e2d1fec1c99a61489e5812a64 100644 (file)
@@ -1,4 +1,4 @@
-# SchedGui.py - Python extension for perf trace, basic GUI code for
+# SchedGui.py - Python extension for perf script, basic GUI code for
 #              traces drawing and overview.
 #
 # Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com>
index 13cc02b5893a7ee0a248b040eddc32cdc675a258..15c8400240fd9029ae34fca077304337d9c75ca6 100644 (file)
@@ -1,4 +1,4 @@
-# Util.py - Python extension for perf trace, miscellaneous utility code
+# Util.py - Python extension for perf script, miscellaneous utility code
 #
 # Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
 #
index 03587021463d4ef6c7d25b4d0a852178fded5a86..fda5096d0cbf81a29792819c9648a43f89497d3c 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
index c8268138fb7e3e6d431be07bada51abed3022294..6c44271091abbb977b2a0ef725dea470626b1337 100644 (file)
@@ -1,4 +1,4 @@
 #!/bin/bash
 # description: futext contention measurement
 
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
index 4ad361b31249c03f10d424b842b92b6d9911acee..8f759291da86c07435a62e7fa044f8c75f9c2749 100644 (file)
@@ -2,4 +2,4 @@
 # description: display a process of packet and processing time
 # args: [tx] [rx] [dev=] [debug]
 
-perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
+perf script -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
index df1791f07c24233c638e445d48ff3ab52955889c..68b037a1849b1aeb71ec86d21fb05af4e208fc87 100644 (file)
@@ -1,3 +1,3 @@
 #!/bin/bash
 # description: sched migration overview
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
index 36b409c05e50ac5e6f80f7b82189aaa0d2ba096a..c32db294124da91d2654302c9d8e32673ec2bc4c 100644 (file)
@@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then
     interval=$1
     shift
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
index 4eb88c9fc83ce7e99e14e8b004f71930b422d89a..16eb8d65c54335e08d1a95e7068e6d0df55be6b5 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
index cb2f9c5cf17e825972870c5c934500672e8bd15e..0f0e9d453bb48a606b3c6522a104bd16499fede8 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
index d9f7893e315c0d5aa064df04a8cc3ac870aebdad..4647a7694cf60a77835f3c80aeb79d54578df69d 100644 (file)
@@ -1,4 +1,4 @@
-# perf trace event handlers, generated by perf trace -g python
+# perf script event handlers, generated by perf script -g python
 # (c) 2010, Tom Zanussi <tzanussi@gmail.com>
 # Licensed under the terms of the GNU GPL License version 2
 #
index acd7848717b35ea7c0c46ae61dc01241673f936d..85805fac41167b9e531c09f51ca21595287e08aa 100644 (file)
@@ -15,7 +15,7 @@ from perf_trace_context import *
 from Core import *
 from Util import *
 
-usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n";
+usage = "perf script -s syscall-counts-by-pid.py [comm|pid]\n";
 
 for_comm = None
 for_pid = None
index b934383c3364e63ed7bd6147bf5509867c65e7e2..74d55ec08aed5ec27867b1d74682a5a0bb320748 100644 (file)
@@ -4,7 +4,7 @@
 #
 # Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com>
 #
-# perf trace event handlers have been generated by perf trace -g python
+# perf script event handlers have been generated by perf script -g python
 #
 # This software is distributed under the terms of the GNU General
 # Public License ("GPL") version 2 as published by the Free Software
index 7a6ec2c7d8abe7bf01b660210811829f37eb572d..42c267e292fa36155f5d6b8270fb4b6943d9f6d0 100644 (file)
@@ -17,7 +17,7 @@ from perf_trace_context import *
 from Core import *
 from Util import *
 
-usage = "perf trace -s sctop.py [comm] [interval]\n";
+usage = "perf script -s sctop.py [comm] [interval]\n";
 
 for_comm = None
 default_interval = 3
index d1ee3ec10cf2b911776df81df7a5d4d66a5f5fc1..c64d1c55d745b7437e4e26f77446b87567c67ed1 100644 (file)
@@ -14,7 +14,7 @@ from perf_trace_context import *
 from Core import *
 from Util import syscall_name
 
-usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
+usage = "perf script -s syscall-counts-by-pid.py [comm]\n";
 
 for_comm = None
 for_pid = None
index ea183dc82d29e54a005f28648201a2219feac224..b435d3f188e84c421819802cb2efcefd62cff0d2 100644 (file)
@@ -15,7 +15,7 @@ from perf_trace_context import *
 from Core import *
 from Util import syscall_name
 
-usage = "perf trace -s syscall-counts.py [comm]\n";
+usage = "perf script -s syscall-counts.py [comm]\n";
 
 for_comm = None
 
index e437edb72417ba2f12e90b810f8851caef2c24fb..deffb8c960716213124b7fd36f56edf41bf8207d 100644 (file)
@@ -14,7 +14,9 @@
 #include <linux/kernel.h>
 #include "debug.h"
 
-static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
+static int build_id__mark_dso_hit(event_t *event,
+                                 struct sample_data *sample __used,
+                                 struct perf_session *session)
 {
        struct addr_location al;
        u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -35,7 +37,8 @@ static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
        return 0;
 }
 
-static int event__exit_del_thread(event_t *self, struct perf_session *session)
+static int event__exit_del_thread(event_t *self, struct sample_data *sample __used,
+                                 struct perf_session *session)
 {
        struct thread *thread = perf_session__findnew(session, self->fork.tid);
 
index 0f9b8d7a7d7e7d62f38c48ec8846251beb532918..3ccaa10433830503325bb7625527839f4586b93e 100644 (file)
@@ -4,32 +4,53 @@
 #include <assert.h>
 #include <stdio.h>
 
-int cpumap[MAX_NR_CPUS];
-
-static int default_cpu_map(void)
+static struct cpu_map *cpu_map__default_new(void)
 {
-       int nr_cpus, i;
+       struct cpu_map *cpus;
+       int nr_cpus;
 
        nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-       assert(nr_cpus <= MAX_NR_CPUS);
-       assert((int)nr_cpus >= 0);
+       if (nr_cpus < 0)
+               return NULL;
+
+       cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
+       if (cpus != NULL) {
+               int i;
+               for (i = 0; i < nr_cpus; ++i)
+                       cpus->map[i] = i;
 
-       for (i = 0; i < nr_cpus; ++i)
-               cpumap[i] = i;
+               cpus->nr = nr_cpus;
+       }
 
-       return nr_cpus;
+       return cpus;
 }
 
-static int read_all_cpu_map(void)
+static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
 {
+       size_t payload_size = nr_cpus * sizeof(int);
+       struct cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
+
+       if (cpus != NULL) {
+               cpus->nr = nr_cpus;
+               memcpy(cpus->map, tmp_cpus, payload_size);
+       }
+
+       return cpus;
+}
+
+static struct cpu_map *cpu_map__read_all_cpu_map(void)
+{
+       struct cpu_map *cpus = NULL;
        FILE *onlnf;
        int nr_cpus = 0;
+       int *tmp_cpus = NULL, *tmp;
+       int max_entries = 0;
        int n, cpu, prev;
        char sep;
 
        onlnf = fopen("/sys/devices/system/cpu/online", "r");
        if (!onlnf)
-               return default_cpu_map();
+               return cpu_map__default_new();
 
        sep = 0;
        prev = -1;
@@ -38,12 +59,28 @@ static int read_all_cpu_map(void)
                if (n <= 0)
                        break;
                if (prev >= 0) {
-                       assert(nr_cpus + cpu - prev - 1 < MAX_NR_CPUS);
+                       int new_max = nr_cpus + cpu - prev - 1;
+
+                       if (new_max >= max_entries) {
+                               max_entries = new_max + MAX_NR_CPUS / 2;
+                               tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+                               if (tmp == NULL)
+                                       goto out_free_tmp;
+                               tmp_cpus = tmp;
+                       }
+
                        while (++prev < cpu)
-                               cpumap[nr_cpus++] = prev;
+                               tmp_cpus[nr_cpus++] = prev;
+               }
+               if (nr_cpus == max_entries) {
+                       max_entries += MAX_NR_CPUS;
+                       tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+                       if (tmp == NULL)
+                               goto out_free_tmp;
+                       tmp_cpus = tmp;
                }
-               assert (nr_cpus < MAX_NR_CPUS);
-               cpumap[nr_cpus++] = cpu;
+
+               tmp_cpus[nr_cpus++] = cpu;
                if (n == 2 && sep == '-')
                        prev = cpu;
                else
@@ -51,24 +88,31 @@ static int read_all_cpu_map(void)
                if (n == 1 || sep == '\n')
                        break;
        }
-       fclose(onlnf);
-       if (nr_cpus > 0)
-               return nr_cpus;
 
-       return default_cpu_map();
+       if (nr_cpus > 0)
+               cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
+       else
+               cpus = cpu_map__default_new();
+out_free_tmp:
+       free(tmp_cpus);
+       fclose(onlnf);
+       return cpus;
 }
 
-int read_cpu_map(const char *cpu_list)
+struct cpu_map *cpu_map__new(const char *cpu_list)
 {
+       struct cpu_map *cpus = NULL;
        unsigned long start_cpu, end_cpu = 0;
        char *p = NULL;
        int i, nr_cpus = 0;
+       int *tmp_cpus = NULL, *tmp;
+       int max_entries = 0;
 
        if (!cpu_list)
-               return read_all_cpu_map();
+               return cpu_map__read_all_cpu_map();
 
        if (!isdigit(*cpu_list))
-               goto invalid;
+               goto out;
 
        while (isdigit(*cpu_list)) {
                p = NULL;
@@ -94,21 +138,42 @@ int read_cpu_map(const char *cpu_list)
                for (; start_cpu <= end_cpu; start_cpu++) {
                        /* check for duplicates */
                        for (i = 0; i < nr_cpus; i++)
-                               if (cpumap[i] == (int)start_cpu)
+                               if (tmp_cpus[i] == (int)start_cpu)
                                        goto invalid;
 
-                       assert(nr_cpus < MAX_NR_CPUS);
-                       cpumap[nr_cpus++] = (int)start_cpu;
+                       if (nr_cpus == max_entries) {
+                               max_entries += MAX_NR_CPUS;
+                               tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+                               if (tmp == NULL)
+                                       goto invalid;
+                               tmp_cpus = tmp;
+                       }
+                       tmp_cpus[nr_cpus++] = (int)start_cpu;
                }
                if (*p)
                        ++p;
 
                cpu_list = p;
        }
-       if (nr_cpus > 0)
-               return nr_cpus;
 
-       return default_cpu_map();
+       if (nr_cpus > 0)
+               cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
+       else
+               cpus = cpu_map__default_new();
 invalid:
-       return -1;
+       free(tmp_cpus);
+out:
+       return cpus;
+}
+
+struct cpu_map *cpu_map__dummy_new(void)
+{
+       struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
+
+       if (cpus != NULL) {
+               cpus->nr = 1;
+               cpus->map[0] = -1;
+       }
+
+       return cpus;
 }
index 3e60f56e490eb10f8cf08981e703bf5699d6b20c..f7a4f42f6307fb522299ea48d1126e6d08ffda71 100644 (file)
@@ -1,7 +1,13 @@
 #ifndef __PERF_CPUMAP_H
 #define __PERF_CPUMAP_H
 
-extern int read_cpu_map(const char *cpu_list);
-extern int cpumap[];
+struct cpu_map {
+       int nr;
+       int map[];
+};
+
+struct cpu_map *cpu_map__new(const char *cpu_list);
+struct cpu_map *cpu_map__dummy_new(void);
+void *cpu_map__delete(struct cpu_map *map);
 
 #endif /* __PERF_CPUMAP_H */
index c8d81b00089d6d9d26ca431aea91453a86ef2351..01bbe8ecec3f7eda9088e9e59ad78ea53b7b5ce6 100644 (file)
@@ -46,20 +46,16 @@ int dump_printf(const char *fmt, ...)
        return ret;
 }
 
-static int dump_printf_color(const char *fmt, const char *color, ...)
+#ifdef NO_NEWT_SUPPORT
+void ui__warning(const char *format, ...)
 {
        va_list args;
-       int ret = 0;
 
-       if (dump_trace) {
-               va_start(args, color);
-               ret = color_vfprintf(stdout, color, fmt, args);
-               va_end(args);
-       }
-
-       return ret;
+       va_start(args, format);
+       vfprintf(stderr, format, args);
+       va_end(args);
 }
-
+#endif
 
 void trace_event(event_t *event)
 {
@@ -70,29 +66,29 @@ void trace_event(event_t *event)
        if (!dump_trace)
                return;
 
-       dump_printf(".");
-       dump_printf_color("\n. ... raw event: size %d bytes\n", color,
-                         event->header.size);
+       printf(".");
+       color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
+                     event->header.size);
 
        for (i = 0; i < event->header.size; i++) {
                if ((i & 15) == 0) {
-                       dump_printf(".");
-                       dump_printf_color("  %04x: ", color, i);
+                       printf(".");
+                       color_fprintf(stdout, color, "  %04x: ", i);
                }
 
-               dump_printf_color(" %02x", color, raw_event[i]);
+               color_fprintf(stdout, color, " %02x", raw_event[i]);
 
                if (((i & 15) == 15) || i == event->header.size-1) {
-                       dump_printf_color("  ", color);
+                       color_fprintf(stdout, color, "  ");
                        for (j = 0; j < 15-(i & 15); j++)
-                               dump_printf_color("   ", color);
+                               color_fprintf(stdout, color, "   ");
                        for (j = i & ~15; j <= i; j++) {
-                               dump_printf_color("%c", color,
-                                               isprint(raw_event[j]) ?
-                                               raw_event[j] : '.');
+                               color_fprintf(stdout, color, "%c",
+                                             isprint(raw_event[j]) ?
+                                             raw_event[j] : '.');
                        }
-                       dump_printf_color("\n", color);
+                       color_fprintf(stdout, color, "\n");
                }
        }
-       dump_printf(".\n");
+       printf(".\n");
 }
index 7b514082bbaff4992c31c590cd6d5b3153149f85..ca35fd66b5dfc8c238f5a4be3cb28ce402cf0bf6 100644 (file)
@@ -35,4 +35,6 @@ int ui_helpline__show_help(const char *format, va_list ap);
 #include "ui/progress.h"
 #endif
 
+void ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
+
 #endif /* __PERF_DEBUG_H */
index dab9e754a28103b1727d6dee29669aaa2d7f89da..2302ec051bb4f1b5171bb543b33284b29bf6f8b6 100644 (file)
@@ -7,7 +7,7 @@
 #include "strlist.h"
 #include "thread.h"
 
-const char *event__name[] = {
+static const char *event__name[] = {
        [0]                      = "TOTAL",
        [PERF_RECORD_MMAP]       = "MMAP",
        [PERF_RECORD_LOST]       = "LOST",
@@ -22,13 +22,31 @@ const char *event__name[] = {
        [PERF_RECORD_HEADER_EVENT_TYPE]  = "EVENT_TYPE",
        [PERF_RECORD_HEADER_TRACING_DATA]        = "TRACING_DATA",
        [PERF_RECORD_HEADER_BUILD_ID]    = "BUILD_ID",
+       [PERF_RECORD_FINISHED_ROUND]     = "FINISHED_ROUND",
 };
 
-static pid_t event__synthesize_comm(pid_t pid, int full,
+const char *event__get_event_name(unsigned int id)
+{
+       if (id >= ARRAY_SIZE(event__name))
+               return "INVALID";
+       if (!event__name[id])
+               return "UNKNOWN";
+       return event__name[id];
+}
+
+static struct sample_data synth_sample = {
+       .pid       = -1,
+       .tid       = -1,
+       .time      = -1,
+       .stream_id = -1,
+       .cpu       = -1,
+       .period    = 1,
+};
+
+static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full,
                                    event__handler_t process,
                                    struct perf_session *session)
 {
-       event_t ev;
        char filename[PATH_MAX];
        char bf[BUFSIZ];
        FILE *fp;
@@ -49,34 +67,39 @@ out_race:
                return 0;
        }
 
-       memset(&ev.comm, 0, sizeof(ev.comm));
-       while (!ev.comm.comm[0] || !ev.comm.pid) {
-               if (fgets(bf, sizeof(bf), fp) == NULL)
-                       goto out_failure;
+       memset(&event->comm, 0, sizeof(event->comm));
+
+       while (!event->comm.comm[0] || !event->comm.pid) {
+               if (fgets(bf, sizeof(bf), fp) == NULL) {
+                       pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
+                       goto out;
+               }
 
                if (memcmp(bf, "Name:", 5) == 0) {
                        char *name = bf + 5;
                        while (*name && isspace(*name))
                                ++name;
                        size = strlen(name) - 1;
-                       memcpy(ev.comm.comm, name, size++);
+                       memcpy(event->comm.comm, name, size++);
                } else if (memcmp(bf, "Tgid:", 5) == 0) {
                        char *tgids = bf + 5;
                        while (*tgids && isspace(*tgids))
                                ++tgids;
-                       tgid = ev.comm.pid = atoi(tgids);
+                       tgid = event->comm.pid = atoi(tgids);
                }
        }
 
-       ev.comm.header.type = PERF_RECORD_COMM;
+       event->comm.header.type = PERF_RECORD_COMM;
        size = ALIGN(size, sizeof(u64));
-       ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size);
-
+       memset(event->comm.comm + size, 0, session->id_hdr_size);
+       event->comm.header.size = (sizeof(event->comm) -
+                               (sizeof(event->comm.comm) - size) +
+                               session->id_hdr_size);
        if (!full) {
-               ev.comm.tid = pid;
+               event->comm.tid = pid;
 
-               process(&ev, session);
-               goto out_fclose;
+               process(event, &synth_sample, session);
+               goto out;
        }
 
        snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -91,22 +114,19 @@ out_race:
                if (*end)
                        continue;
 
-               ev.comm.tid = pid;
+               event->comm.tid = pid;
 
-               process(&ev, session);
+               process(event, &synth_sample, session);
        }
-       closedir(tasks);
 
-out_fclose:
+       closedir(tasks);
+out:
        fclose(fp);
-       return tgid;
 
-out_failure:
-       pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
-       return -1;
+       return tgid;
 }
 
-static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
+static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
                                         event__handler_t process,
                                         struct perf_session *session)
 {
@@ -124,29 +144,25 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
                return -1;
        }
 
+       event->header.type = PERF_RECORD_MMAP;
+       /*
+        * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
+        */
+       event->header.misc = PERF_RECORD_MISC_USER;
+
        while (1) {
                char bf[BUFSIZ], *pbf = bf;
-               event_t ev = {
-                       .header = {
-                               .type = PERF_RECORD_MMAP,
-                               /*
-                                * Just like the kernel, see __perf_event_mmap
-                                * in kernel/perf_event.c
-                                */
-                               .misc = PERF_RECORD_MISC_USER,
-                        },
-               };
                int n;
                size_t size;
                if (fgets(bf, sizeof(bf), fp) == NULL)
                        break;
 
                /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-               n = hex2u64(pbf, &ev.mmap.start);
+               n = hex2u64(pbf, &event->mmap.start);
                if (n < 0)
                        continue;
                pbf += n + 1;
-               n = hex2u64(pbf, &ev.mmap.len);
+               n = hex2u64(pbf, &event->mmap.len);
                if (n < 0)
                        continue;
                pbf += n + 3;
@@ -161,19 +177,21 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
                                continue;
 
                        pbf += 3;
-                       n = hex2u64(pbf, &ev.mmap.pgoff);
+                       n = hex2u64(pbf, &event->mmap.pgoff);
 
                        size = strlen(execname);
                        execname[size - 1] = '\0'; /* Remove \n */
-                       memcpy(ev.mmap.filename, execname, size);
+                       memcpy(event->mmap.filename, execname, size);
                        size = ALIGN(size, sizeof(u64));
-                       ev.mmap.len -= ev.mmap.start;
-                       ev.mmap.header.size = (sizeof(ev.mmap) -
-                                              (sizeof(ev.mmap.filename) - size));
-                       ev.mmap.pid = tgid;
-                       ev.mmap.tid = pid;
-
-                       process(&ev, session);
+                       event->mmap.len -= event->mmap.start;
+                       event->mmap.header.size = (sizeof(event->mmap) -
+                                               (sizeof(event->mmap.filename) - size));
+                       memset(event->mmap.filename + size, 0, session->id_hdr_size);
+                       event->mmap.header.size += session->id_hdr_size;
+                       event->mmap.pid = tgid;
+                       event->mmap.tid = pid;
+
+                       process(event, &synth_sample, session);
                }
        }
 
@@ -187,20 +205,27 @@ int event__synthesize_modules(event__handler_t process,
 {
        struct rb_node *nd;
        struct map_groups *kmaps = &machine->kmaps;
-       u16 misc;
+       event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
+
+       if (event == NULL) {
+               pr_debug("Not enough memory synthesizing mmap event "
+                        "for kernel modules\n");
+               return -1;
+       }
+
+       event->header.type = PERF_RECORD_MMAP;
 
        /*
         * kernel uses 0 for user space maps, see kernel/perf_event.c
         * __perf_event_mmap
         */
        if (machine__is_host(machine))
-               misc = PERF_RECORD_MISC_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_KERNEL;
        else
-               misc = PERF_RECORD_MISC_GUEST_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
 
        for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
             nd; nd = rb_next(nd)) {
-               event_t ev;
                size_t size;
                struct map *pos = rb_entry(nd, struct map, rb_node);
 
@@ -208,39 +233,78 @@ int event__synthesize_modules(event__handler_t process,
                        continue;
 
                size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
-               memset(&ev, 0, sizeof(ev));
-               ev.mmap.header.misc = misc;
-               ev.mmap.header.type = PERF_RECORD_MMAP;
-               ev.mmap.header.size = (sizeof(ev.mmap) -
-                                       (sizeof(ev.mmap.filename) - size));
-               ev.mmap.start = pos->start;
-               ev.mmap.len   = pos->end - pos->start;
-               ev.mmap.pid   = machine->pid;
-
-               memcpy(ev.mmap.filename, pos->dso->long_name,
+               event->mmap.header.type = PERF_RECORD_MMAP;
+               event->mmap.header.size = (sizeof(event->mmap) -
+                                       (sizeof(event->mmap.filename) - size));
+               memset(event->mmap.filename + size, 0, session->id_hdr_size);
+               event->mmap.header.size += session->id_hdr_size;
+               event->mmap.start = pos->start;
+               event->mmap.len   = pos->end - pos->start;
+               event->mmap.pid   = machine->pid;
+
+               memcpy(event->mmap.filename, pos->dso->long_name,
                       pos->dso->long_name_len + 1);
-               process(&ev, session);
+               process(event, &synth_sample, session);
        }
 
+       free(event);
        return 0;
 }
 
-int event__synthesize_thread(pid_t pid, event__handler_t process,
-                            struct perf_session *session)
+static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event,
+                                     pid_t pid, event__handler_t process,
+                                     struct perf_session *session)
 {
-       pid_t tgid = event__synthesize_comm(pid, 1, process, session);
+       pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process,
+                                           session);
        if (tgid == -1)
                return -1;
-       return event__synthesize_mmap_events(pid, tgid, process, session);
+       return event__synthesize_mmap_events(mmap_event, pid, tgid,
+                                            process, session);
+}
+
+int event__synthesize_thread(pid_t pid, event__handler_t process,
+                            struct perf_session *session)
+{
+       event_t *comm_event, *mmap_event;
+       int err = -1;
+
+       comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+       if (comm_event == NULL)
+               goto out;
+
+       mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+       if (mmap_event == NULL)
+               goto out_free_comm;
+
+       err = __event__synthesize_thread(comm_event, mmap_event, pid,
+                                        process, session);
+       free(mmap_event);
+out_free_comm:
+       free(comm_event);
+out:
+       return err;
 }
 
-void event__synthesize_threads(event__handler_t process,
-                              struct perf_session *session)
+int event__synthesize_threads(event__handler_t process,
+                             struct perf_session *session)
 {
        DIR *proc;
        struct dirent dirent, *next;
+       event_t *comm_event, *mmap_event;
+       int err = -1;
+
+       comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+       if (comm_event == NULL)
+               goto out;
+
+       mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+       if (mmap_event == NULL)
+               goto out_free_comm;
 
        proc = opendir("/proc");
+       if (proc == NULL)
+               goto out_free_mmap;
 
        while (!readdir_r(proc, &dirent, &next) && next) {
                char *end;
@@ -249,10 +313,18 @@ void event__synthesize_threads(event__handler_t process,
                if (*end) /* only interested in proper numerical dirents */
                        continue;
 
-               event__synthesize_thread(pid, process, session);
+               __event__synthesize_thread(comm_event, mmap_event, pid,
+                                          process, session);
        }
 
        closedir(proc);
+       err = 0;
+out_free_mmap:
+       free(mmap_event);
+out_free_comm:
+       free(comm_event);
+out:
+       return err;
 }
 
 struct process_symbol_args {
@@ -260,7 +332,8 @@ struct process_symbol_args {
        u64        start;
 };
 
-static int find_symbol_cb(void *arg, const char *name, char type, u64 start)
+static int find_symbol_cb(void *arg, const char *name, char type,
+                         u64 start, u64 end __used)
 {
        struct process_symbol_args *args = arg;
 
@@ -286,18 +359,20 @@ int event__synthesize_kernel_mmap(event__handler_t process,
        char path[PATH_MAX];
        char name_buff[PATH_MAX];
        struct map *map;
-
-       event_t ev = {
-               .header = {
-                       .type = PERF_RECORD_MMAP,
-               },
-       };
+       int err;
        /*
         * We should get this from /sys/kernel/sections/.text, but till that is
         * available use this, and after it is use this as a fallback for older
         * kernels.
         */
        struct process_symbol_args args = { .name = symbol_name, };
+       event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
+
+       if (event == NULL) {
+               pr_debug("Not enough memory synthesizing mmap event "
+                        "for kernel modules\n");
+               return -1;
+       }
 
        mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
        if (machine__is_host(machine)) {
@@ -305,10 +380,10 @@ int event__synthesize_kernel_mmap(event__handler_t process,
                 * kernel uses PERF_RECORD_MISC_USER for user space maps,
                 * see kernel/perf_event.c __perf_event_mmap
                 */
-               ev.header.misc = PERF_RECORD_MISC_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_KERNEL;
                filename = "/proc/kallsyms";
        } else {
-               ev.header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
                if (machine__is_default_guest(machine))
                        filename = (char *) symbol_conf.default_guest_kallsyms;
                else {
@@ -321,17 +396,21 @@ int event__synthesize_kernel_mmap(event__handler_t process,
                return -ENOENT;
 
        map = machine->vmlinux_maps[MAP__FUNCTION];
-       size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename),
+       size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
                        "%s%s", mmap_name, symbol_name) + 1;
        size = ALIGN(size, sizeof(u64));
-       ev.mmap.header.size = (sizeof(ev.mmap) -
-                       (sizeof(ev.mmap.filename) - size));
-       ev.mmap.pgoff = args.start;
-       ev.mmap.start = map->start;
-       ev.mmap.len   = map->end - ev.mmap.start;
-       ev.mmap.pid   = machine->pid;
-
-       return process(&ev, session);
+       event->mmap.header.type = PERF_RECORD_MMAP;
+       event->mmap.header.size = (sizeof(event->mmap) -
+                       (sizeof(event->mmap.filename) - size) + session->id_hdr_size);
+       event->mmap.pgoff = args.start;
+       event->mmap.start = map->start;
+       event->mmap.len   = map->end - event->mmap.start;
+       event->mmap.pid   = machine->pid;
+
+       err = process(event, &synth_sample, session);
+       free(event);
+
+       return err;
 }
 
 static void thread__comm_adjust(struct thread *self, struct hists *hists)
@@ -361,7 +440,8 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm,
        return 0;
 }
 
-int event__process_comm(event_t *self, struct perf_session *session)
+int event__process_comm(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        struct thread *thread = perf_session__findnew(session, self->comm.tid);
 
@@ -376,7 +456,8 @@ int event__process_comm(event_t *self, struct perf_session *session)
        return 0;
 }
 
-int event__process_lost(event_t *self, struct perf_session *session)
+int event__process_lost(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
        session->hists.stats.total_lost += self->lost.lost;
@@ -392,7 +473,7 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
         * a zero sized synthesized MMAP event for the kernel.
         */
        if (maps[MAP__FUNCTION]->end == 0)
-               maps[MAP__FUNCTION]->end = ~0UL;
+               maps[MAP__FUNCTION]->end = ~0ULL;
 }
 
 static int event__process_kernel_mmap(event_t *self,
@@ -485,7 +566,8 @@ out_problem:
        return -1;
 }
 
-int event__process_mmap(event_t *self, struct perf_session *session)
+int event__process_mmap(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        struct machine *machine;
        struct thread *thread;
@@ -526,7 +608,8 @@ out_problem:
        return 0;
 }
 
-int event__process_task(event_t *self, struct perf_session *session)
+int event__process_task(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        struct thread *thread = perf_session__findnew(session, self->fork.tid);
        struct thread *parent = perf_session__findnew(session, self->fork.ptid);
@@ -548,18 +631,19 @@ int event__process_task(event_t *self, struct perf_session *session)
        return 0;
 }
 
-int event__process(event_t *event, struct perf_session *session)
+int event__process(event_t *event, struct sample_data *sample,
+                  struct perf_session *session)
 {
        switch (event->header.type) {
        case PERF_RECORD_COMM:
-               event__process_comm(event, session);
+               event__process_comm(event, sample, session);
                break;
        case PERF_RECORD_MMAP:
-               event__process_mmap(event, session);
+               event__process_mmap(event, sample, session);
                break;
        case PERF_RECORD_FORK:
        case PERF_RECORD_EXIT:
-               event__process_task(event, session);
+               event__process_task(event, sample, session);
                break;
        default:
                break;
@@ -674,32 +758,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
                             symbol_filter_t filter)
 {
        u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-       struct thread *thread;
-
-       event__parse_sample(self, session->sample_type, data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n",
-                   self->header.misc, data->pid, data->tid, data->ip,
-                   data->period, data->cpu);
-
-       if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
-               unsigned int i;
-
-               dump_printf("... chain: nr:%Lu\n", data->callchain->nr);
+       struct thread *thread = perf_session__findnew(session, self->ip.pid);
 
-               if (!ip_callchain__valid(data->callchain, self)) {
-                       pr_debug("call-chain problem with event, "
-                                "skipping it.\n");
-                       goto out_filtered;
-               }
-
-               if (dump_trace) {
-                       for (i = 0; i < data->callchain->nr; i++)
-                               dump_printf("..... %2d: %016Lx\n",
-                                           i, data->callchain->ips[i]);
-               }
-       }
-       thread = perf_session__findnew(session, self->ip.pid);
        if (thread == NULL)
                return -1;
 
@@ -766,9 +826,65 @@ out_filtered:
        return 0;
 }
 
-int event__parse_sample(const event_t *event, u64 type, struct sample_data *data)
+static int event__parse_id_sample(const event_t *event,
+                                 struct perf_session *session,
+                                 struct sample_data *sample)
 {
-       const u64 *array = event->sample.array;
+       const u64 *array;
+       u64 type;
+
+       sample->cpu = sample->pid = sample->tid = -1;
+       sample->stream_id = sample->id = sample->time = -1ULL;
+
+       if (!session->sample_id_all)
+               return 0;
+
+       array = event->sample.array;
+       array += ((event->header.size -
+                  sizeof(event->header)) / sizeof(u64)) - 1;
+       type = session->sample_type;
+
+       if (type & PERF_SAMPLE_CPU) {
+               u32 *p = (u32 *)array;
+               sample->cpu = *p;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_STREAM_ID) {
+               sample->stream_id = *array;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_ID) {
+               sample->id = *array;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_TIME) {
+               sample->time = *array;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_TID) {
+               u32 *p = (u32 *)array;
+               sample->pid = p[0];
+               sample->tid = p[1];
+       }
+
+       return 0;
+}
+
+int event__parse_sample(const event_t *event, struct perf_session *session,
+                       struct sample_data *data)
+{
+       const u64 *array;
+       u64 type;
+
+       if (event->header.type != PERF_RECORD_SAMPLE)
+               return event__parse_id_sample(event, session, data);
+
+       array = event->sample.array;
+       type = session->sample_type;
 
        if (type & PERF_SAMPLE_IP) {
                data->ip = event->ip.ip;
index 8e790dae702625aa564594bee1dd52618b94f8d8..2b7e91902f105d5962a68254737ce21cfeef935c 100644 (file)
@@ -85,6 +85,7 @@ struct build_id_event {
 };
 
 enum perf_user_event_type { /* above any possible kernel type */
+       PERF_RECORD_USER_TYPE_START             = 64,
        PERF_RECORD_HEADER_ATTR                 = 64,
        PERF_RECORD_HEADER_EVENT_TYPE           = 65,
        PERF_RECORD_HEADER_TRACING_DATA         = 66,
@@ -135,12 +136,15 @@ void event__print_totals(void);
 
 struct perf_session;
 
-typedef int (*event__handler_t)(event_t *event, struct perf_session *session);
+typedef int (*event__handler_synth_t)(event_t *event, 
+                                     struct perf_session *session);
+typedef int (*event__handler_t)(event_t *event, struct sample_data *sample,
+                               struct perf_session *session);
 
 int event__synthesize_thread(pid_t pid, event__handler_t process,
                             struct perf_session *session);
-void event__synthesize_threads(event__handler_t process,
-                              struct perf_session *session);
+int event__synthesize_threads(event__handler_t process,
+                             struct perf_session *session);
 int event__synthesize_kernel_mmap(event__handler_t process,
                                struct perf_session *session,
                                struct machine *machine,
@@ -150,18 +154,24 @@ int event__synthesize_modules(event__handler_t process,
                              struct perf_session *session,
                              struct machine *machine);
 
-int event__process_comm(event_t *self, struct perf_session *session);
-int event__process_lost(event_t *self, struct perf_session *session);
-int event__process_mmap(event_t *self, struct perf_session *session);
-int event__process_task(event_t *self, struct perf_session *session);
-int event__process(event_t *event, struct perf_session *session);
+int event__process_comm(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process_lost(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process_mmap(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process_task(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process(event_t *event, struct sample_data *sample,
+                  struct perf_session *session);
 
 struct addr_location;
 int event__preprocess_sample(const event_t *self, struct perf_session *session,
                             struct addr_location *al, struct sample_data *data,
                             symbol_filter_t filter);
-int event__parse_sample(const event_t *event, u64 type, struct sample_data *data);
+int event__parse_sample(const event_t *event, struct perf_session *session,
+                       struct sample_data *sample);
 
-extern const char *event__name[];
+const char *event__get_event_name(unsigned int id);
 
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
new file mode 100644 (file)
index 0000000..c95267e
--- /dev/null
@@ -0,0 +1,186 @@
+#include "evsel.h"
+#include "../perf.h"
+#include "util.h"
+#include "cpumap.h"
+#include "thread.h"
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
+struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx)
+{
+       struct perf_evsel *evsel = zalloc(sizeof(*evsel));
+
+       if (evsel != NULL) {
+               evsel->idx         = idx;
+               evsel->attr.type   = type;
+               evsel->attr.config = config;
+               INIT_LIST_HEAD(&evsel->node);
+       }
+
+       return evsel;
+}
+
+int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+       evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
+       return evsel->fd != NULL ? 0 : -ENOMEM;
+}
+
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
+{
+       evsel->counts = zalloc((sizeof(*evsel->counts) +
+                               (ncpus * sizeof(struct perf_counts_values))));
+       return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evsel__free_fd(struct perf_evsel *evsel)
+{
+       xyarray__delete(evsel->fd);
+       evsel->fd = NULL;
+}
+
+void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+       int cpu, thread;
+
+       for (cpu = 0; cpu < ncpus; cpu++)
+               for (thread = 0; thread < nthreads; ++thread) {
+                       close(FD(evsel, cpu, thread));
+                       FD(evsel, cpu, thread) = -1;
+               }
+}
+
+void perf_evsel__delete(struct perf_evsel *evsel)
+{
+       assert(list_empty(&evsel->node));
+       xyarray__delete(evsel->fd);
+       free(evsel);
+}
+
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                             int cpu, int thread, bool scale)
+{
+       struct perf_counts_values count;
+       size_t nv = scale ? 3 : 1;
+
+       if (FD(evsel, cpu, thread) < 0)
+               return -EINVAL;
+
+       if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
+               return -ENOMEM;
+
+       if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
+               return -errno;
+
+       if (scale) {
+               if (count.run == 0)
+                       count.val = 0;
+               else if (count.run < count.ena)
+                       count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
+       } else
+               count.ena = count.run = 0;
+
+       evsel->counts->cpu[cpu] = count;
+       return 0;
+}
+
+int __perf_evsel__read(struct perf_evsel *evsel,
+                      int ncpus, int nthreads, bool scale)
+{
+       size_t nv = scale ? 3 : 1;
+       int cpu, thread;
+       struct perf_counts_values *aggr = &evsel->counts->aggr, count;
+
+       aggr->val = 0;
+
+       for (cpu = 0; cpu < ncpus; cpu++) {
+               for (thread = 0; thread < nthreads; thread++) {
+                       if (FD(evsel, cpu, thread) < 0)
+                               continue;
+
+                       if (readn(FD(evsel, cpu, thread),
+                                 &count, nv * sizeof(u64)) < 0)
+                               return -errno;
+
+                       aggr->val += count.val;
+                       if (scale) {
+                               aggr->ena += count.ena;
+                               aggr->run += count.run;
+                       }
+               }
+       }
+
+       evsel->counts->scaled = 0;
+       if (scale) {
+               if (aggr->run == 0) {
+                       evsel->counts->scaled = -1;
+                       aggr->val = 0;
+                       return 0;
+               }
+
+               if (aggr->run < aggr->ena) {
+                       evsel->counts->scaled = 1;
+                       aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
+               }
+       } else
+               aggr->ena = aggr->run = 0;
+
+       return 0;
+}
+
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
+{
+       int cpu;
+
+       if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0)
+               return -1;
+
+       for (cpu = 0; cpu < cpus->nr; cpu++) {
+               FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1,
+                                                       cpus->map[cpu], -1, 0);
+               if (FD(evsel, cpu, 0) < 0)
+                       goto out_close;
+       }
+
+       return 0;
+
+out_close:
+       while (--cpu >= 0) {
+               close(FD(evsel, cpu, 0));
+               FD(evsel, cpu, 0) = -1;
+       }
+       return -1;
+}
+
+int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+{
+       int thread;
+
+       if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr))
+               return -1;
+
+       for (thread = 0; thread < threads->nr; thread++) {
+               FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr,
+                                                          threads->map[thread], -1, -1, 0);
+               if (FD(evsel, 0, thread) < 0)
+                       goto out_close;
+       }
+
+       return 0;
+
+out_close:
+       while (--thread >= 0) {
+               close(FD(evsel, 0, thread));
+               FD(evsel, 0, thread) = -1;
+       }
+       return -1;
+}
+
+int perf_evsel__open(struct perf_evsel *evsel, 
+                    struct cpu_map *cpus, struct thread_map *threads)
+{
+       if (threads == NULL)
+               return perf_evsel__open_per_cpu(evsel, cpus);
+
+       return perf_evsel__open_per_thread(evsel, threads);
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
new file mode 100644 (file)
index 0000000..a0ccd69
--- /dev/null
@@ -0,0 +1,115 @@
+#ifndef __PERF_EVSEL_H
+#define __PERF_EVSEL_H 1
+
+#include <linux/list.h>
+#include <stdbool.h>
+#include "../../../include/linux/perf_event.h"
+#include "types.h"
+#include "xyarray.h"
+struct perf_counts_values {
+       union {
+               struct {
+                       u64 val;
+                       u64 ena;
+                       u64 run;
+               };
+               u64 values[3];
+       };
+};
+
+struct perf_counts {
+       s8                        scaled;
+       struct perf_counts_values aggr;
+       struct perf_counts_values cpu[];
+};
+
+struct perf_evsel {
+       struct list_head        node;
+       struct perf_event_attr  attr;
+       char                    *filter;
+       struct xyarray          *fd;
+       struct perf_counts      *counts;
+       int                     idx;
+       void                    *priv;
+};
+
+struct cpu_map;
+struct thread_map;
+
+struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx);
+void perf_evsel__delete(struct perf_evsel *evsel);
+
+int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
+void perf_evsel__free_fd(struct perf_evsel *evsel);
+void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus);
+int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads);
+int perf_evsel__open(struct perf_evsel *evsel, 
+                    struct cpu_map *cpus, struct thread_map *threads);
+
+#define perf_evsel__match(evsel, t, c)         \
+       (evsel->attr.type == PERF_TYPE_##t &&   \
+        evsel->attr.config == PERF_COUNT_##c)
+
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                             int cpu, int thread, bool scale);
+
+/**
+ * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                                         int cpu, int thread)
+{
+       return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
+}
+
+/**
+ * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
+                                                int cpu, int thread)
+{
+       return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
+}
+
+int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
+                      bool scale);
+
+/**
+ * perf_evsel__read - Read the aggregate results on all CPUs
+ *
+ * @evsel - event selector to read value
+ * @ncpus - Number of cpus affected, from zero
+ * @nthreads - Number of threads affected, from zero
+ */
+static inline int perf_evsel__read(struct perf_evsel *evsel,
+                                   int ncpus, int nthreads)
+{
+       return __perf_evsel__read(evsel, ncpus, nthreads, false);
+}
+
+/**
+ * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
+ *
+ * @evsel - event selector to read value
+ * @ncpus - Number of cpus affected, from zero
+ * @nthreads - Number of threads affected, from zero
+ */
+static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
+                                         int ncpus, int nthreads)
+{
+       return __perf_evsel__read(evsel, ncpus, nthreads, true);
+}
+
+#endif /* __PERF_EVSEL_H */
index 64a85bafde63a1c6f05451a32b31eec521c799d9..989fa2dee2fd2ae441d98921b9e69a56c4fe50e2 100644 (file)
@@ -152,6 +152,11 @@ void perf_header__set_feat(struct perf_header *self, int feat)
        set_bit(feat, self->adds_features);
 }
 
+void perf_header__clear_feat(struct perf_header *self, int feat)
+{
+       clear_bit(feat, self->adds_features);
+}
+
 bool perf_header__has_feat(const struct perf_header *self, int feat)
 {
        return test_bit(feat, self->adds_features);
@@ -265,15 +270,16 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
                          const char *name, bool is_kallsyms)
 {
        const size_t size = PATH_MAX;
-       char *filename = malloc(size),
+       char *realname = realpath(name, NULL),
+            *filename = malloc(size),
             *linkname = malloc(size), *targetname;
        int len, err = -1;
 
-       if (filename == NULL || linkname == NULL)
+       if (realname == NULL || filename == NULL || linkname == NULL)
                goto out_free;
 
        len = snprintf(filename, size, "%s%s%s",
-                      debugdir, is_kallsyms ? "/" : "", name);
+                      debugdir, is_kallsyms ? "/" : "", realname);
        if (mkdir_p(filename, 0755))
                goto out_free;
 
@@ -283,7 +289,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
                if (is_kallsyms) {
                         if (copyfile("/proc/kallsyms", filename))
                                goto out_free;
-               } else if (link(name, filename) && copyfile(name, filename))
+               } else if (link(realname, filename) && copyfile(name, filename))
                        goto out_free;
        }
 
@@ -300,6 +306,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
        if (symlink(targetname, linkname) == 0)
                err = 0;
 out_free:
+       free(realname);
        free(filename);
        free(linkname);
        return err;
@@ -431,8 +438,10 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
        int idx = 0, err;
 
        session = container_of(self, struct perf_session, header);
-       if (perf_session__read_build_ids(session, true))
-               perf_header__set_feat(self, HEADER_BUILD_ID);
+
+       if (perf_header__has_feat(self, HEADER_BUILD_ID &&
+           !perf_session__read_build_ids(session, true)))
+               perf_header__clear_feat(self, HEADER_BUILD_ID);
 
        nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
        if (!nr_sections)
@@ -454,7 +463,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
 
                /* Write trace info */
                trace_sec->offset = lseek(fd, 0, SEEK_CUR);
-               read_tracing_data(fd, attrs, nr_counters);
+               read_tracing_data(fd, &evsel_list);
                trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
        }
 
@@ -597,7 +606,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
 static int perf_header__getbuffer64(struct perf_header *self,
                                    int fd, void *buf, size_t size)
 {
-       if (do_read(fd, buf, size) <= 0)
+       if (readn(fd, buf, size) <= 0)
                return -1;
 
        if (self->needs_swap)
@@ -653,7 +662,7 @@ int perf_file_header__read(struct perf_file_header *self,
 {
        lseek(fd, 0, SEEK_SET);
 
-       if (do_read(fd, self, sizeof(*self)) <= 0 ||
+       if (readn(fd, self, sizeof(*self)) <= 0 ||
            memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
                return -1;
 
@@ -814,7 +823,7 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *self,
                                       struct perf_header *ph, int fd,
                                       bool repipe)
 {
-       if (do_read(fd, self, sizeof(*self)) <= 0 ||
+       if (readn(fd, self, sizeof(*self)) <= 0 ||
            memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
                return -1;
 
@@ -939,6 +948,24 @@ u64 perf_header__sample_type(struct perf_header *header)
        return type;
 }
 
+bool perf_header__sample_id_all(const struct perf_header *header)
+{
+       bool value = false, first = true;
+       int i;
+
+       for (i = 0; i < header->attrs; i++) {
+               struct perf_header_attr *attr = header->attr[i];
+
+               if (first) {
+                       value = attr->attr.sample_id_all;
+                       first = false;
+               } else if (value != attr->attr.sample_id_all)
+                       die("non matching sample_id_all");
+       }
+
+       return value;
+}
+
 struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header)
 {
@@ -985,21 +1012,23 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
 
        ev = malloc(size);
 
+       if (ev == NULL)
+               return -ENOMEM;
+
        ev->attr.attr = *attr;
        memcpy(ev->attr.id, id, ids * sizeof(u64));
 
        ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
        ev->attr.header.size = size;
 
-       err = process(ev, session);
+       err = process(ev, NULL, session);
 
        free(ev);
 
        return err;
 }
 
-int event__synthesize_attrs(struct perf_header *self,
-                           event__handler_t process,
+int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
                            struct perf_session *session)
 {
        struct perf_header_attr *attr;
@@ -1069,7 +1098,7 @@ int event__synthesize_event_type(u64 event_id, char *name,
        ev.event_type.header.size = sizeof(ev.event_type) -
                (sizeof(ev.event_type.event_type.name) - size);
 
-       err = process(&ev, session);
+       err = process(&ev, NULL, session);
 
        return err;
 }
@@ -1104,8 +1133,7 @@ int event__process_event_type(event_t *self,
        return 0;
 }
 
-int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
-                                  int nb_events,
+int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
                                   event__handler_t process,
                                   struct perf_session *session __unused)
 {
@@ -1116,7 +1144,7 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
        memset(&ev, 0, sizeof(ev));
 
        ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
-       size = read_tracing_data_size(fd, pattrs, nb_events);
+       size = read_tracing_data_size(fd, pattrs);
        if (size <= 0)
                return size;
        aligned_size = ALIGN(size, sizeof(u64));
@@ -1124,9 +1152,9 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
        ev.tracing_data.header.size = sizeof(ev.tracing_data);
        ev.tracing_data.size = aligned_size;
 
-       process(&ev, session);
+       process(&ev, NULL, session);
 
-       err = read_tracing_data(fd, pattrs, nb_events);
+       err = read_tracing_data(fd, pattrs);
        write_padded(fd, NULL, 0, padding);
 
        return aligned_size;
@@ -1184,7 +1212,7 @@ int event__synthesize_build_id(struct dso *pos, u16 misc,
        ev.build_id.header.size = sizeof(ev.build_id) + len;
        memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
 
-       err = process(&ev, session);
+       err = process(&ev, NULL, session);
 
        return err;
 }
index 402ac2454cf8bcc664c3daf193119f470c77d7c3..33f16be7b72fdad5a3757acbfbceb52b3966c593 100644 (file)
@@ -81,9 +81,11 @@ void perf_header_attr__delete(struct perf_header_attr *self);
 int perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
 
 u64 perf_header__sample_type(struct perf_header *header);
+bool perf_header__sample_id_all(const struct perf_header *header);
 struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header);
 void perf_header__set_feat(struct perf_header *self, int feat);
+void perf_header__clear_feat(struct perf_header *self, int feat);
 bool perf_header__has_feat(const struct perf_header *self, int feat);
 
 int perf_header__process_sections(struct perf_header *self, int fd,
@@ -111,8 +113,7 @@ int event__synthesize_event_types(event__handler_t process,
 int event__process_event_type(event_t *self,
                              struct perf_session *session);
 
-int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
-                                  int nb_events,
+int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
                                   event__handler_t process,
                                   struct perf_session *session);
 int event__process_tracing_data(event_t *self,
index 2022e87409942ca4b0d133c3f889e41178a663d1..c749ba6136a0ac33d7cdae8bf5b6604f1ee524af 100644 (file)
@@ -356,7 +356,7 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
 
 static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
                                     int depth, int depth_mask, int period,
-                                    u64 total_samples, int hits,
+                                    u64 total_samples, u64 hits,
                                     int left_margin)
 {
        int i;
@@ -1092,6 +1092,12 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
        FILE *file;
        int err = 0;
        u64 len;
+       char symfs_filename[PATH_MAX];
+
+       if (filename) {
+               snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+                        symbol_conf.symfs, filename);
+       }
 
        if (filename == NULL) {
                if (dso->has_build_id) {
@@ -1100,9 +1106,9 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
                        return -ENOMEM;
                }
                goto fallback;
-       } else if (readlink(filename, command, sizeof(command)) < 0 ||
+       } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
                   strstr(command, "[kernel.kallsyms]") ||
-                  access(filename, R_OK)) {
+                  access(symfs_filename, R_OK)) {
                free(filename);
 fallback:
                /*
@@ -1111,6 +1117,8 @@ fallback:
                 * DSO is the same as when 'perf record' ran.
                 */
                filename = dso->long_name;
+               snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+                        symbol_conf.symfs, filename);
                free_filename = false;
        }
 
@@ -1137,7 +1145,7 @@ fallback:
                 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand",
                 map__rip_2objdump(map, sym->start),
                 map__rip_2objdump(map, sym->end),
-                filename, filename);
+                symfs_filename, filename);
 
        pr_debug("Executing: %s\n", command);
 
@@ -1168,10 +1176,13 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
        size_t ret = 0;
 
        for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
-               if (!event__name[i])
+               const char *name = event__get_event_name(i);
+
+               if (!strcmp(name, "UNKNOWN"))
                        continue;
-               ret += fprintf(fp, "%10s events: %10d\n",
-                              event__name[i], self->stats.nr_events[i]);
+
+               ret += fprintf(fp, "%16s events: %10d\n", name,
+                              self->stats.nr_events[i]);
        }
 
        return ret;
index 587d375d34300daa09948c1a0d95407b4f8439fa..ee789856a8c94644e189f0dc8a7be7933469a6cb 100644 (file)
@@ -52,8 +52,10 @@ struct sym_priv {
 struct events_stats {
        u64 total_period;
        u64 total_lost;
+       u64 total_invalid_chains;
        u32 nr_events[PERF_RECORD_HEADER_MAX];
        u32 nr_unknown_events;
+       u32 nr_invalid_chains;
 };
 
 enum hist_column {
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h
new file mode 100644 (file)
index 0000000..acffd5e
--- /dev/null
@@ -0,0 +1,9 @@
+
+#ifndef PERF_CPUFEATURE_H
+#define PERF_CPUFEATURE_H
+
+/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define X86_FEATURE_REP_GOOD 0
+
+#endif /* PERF_CPUFEATURE_H */
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
new file mode 100644 (file)
index 0000000..bb4198e
--- /dev/null
@@ -0,0 +1,11 @@
+
+#ifndef PERF_DWARF2_H
+#define PERF_DWARF2_H
+
+/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define CFI_STARTPROC
+#define CFI_ENDPROC
+
+#endif /* PERF_DWARF2_H */
+
index bb4ac2e053859482f98933b278a8d0adda71aa5a..8be0b968ca0bcfa44c95248e14e3878481887b21 100644 (file)
@@ -13,6 +13,11 @@ static inline void set_bit(int nr, unsigned long *addr)
        addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
 }
 
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+       addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
+}
+
 static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
 {
        return ((1UL << (nr % BITS_PER_LONG)) &
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
new file mode 100644 (file)
index 0000000..06387cf
--- /dev/null
@@ -0,0 +1,13 @@
+
+#ifndef PERF_LINUX_LINKAGE_H_
+#define PERF_LINUX_LINKAGE_H_
+
+/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
+
+#define ENTRY(name)                            \
+       .globl name;                            \
+       name:
+
+#define ENDPROC(name)
+
+#endif /* PERF_LINUX_LINKAGE_H_ */
index 4af5bd59cfd14b475d0f2fa60e15f1b4b4e908de..649083f27e08bfbfa952b10bb68fc8639f4d1195 100644 (file)
@@ -1,6 +1,7 @@
 #include "../../../include/linux/hw_breakpoint.h"
 #include "util.h"
 #include "../perf.h"
+#include "evsel.h"
 #include "parse-options.h"
 #include "parse-events.h"
 #include "exec_cmd.h"
@@ -12,8 +13,7 @@
 
 int                            nr_counters;
 
-struct perf_event_attr         attrs[MAX_COUNTERS];
-char                           *filters[MAX_COUNTERS];
+LIST_HEAD(evsel_list);
 
 struct event_symbol {
        u8              type;
@@ -266,10 +266,10 @@ static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result)
        return name;
 }
 
-const char *event_name(int counter)
+const char *event_name(struct perf_evsel *evsel)
 {
-       u64 config = attrs[counter].config;
-       int type = attrs[counter].type;
+       u64 config = evsel->attr.config;
+       int type = evsel->attr.type;
 
        return __event_name(type, config);
 }
@@ -434,7 +434,7 @@ parse_single_tracepoint_event(char *sys_name,
        id = atoll(id_buf);
        attr->config = id;
        attr->type = PERF_TYPE_TRACEPOINT;
-       *strp = evt_name + evt_length;
+       *strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */
 
        attr->sample_type |= PERF_SAMPLE_RAW;
        attr->sample_type |= PERF_SAMPLE_TIME;
@@ -495,7 +495,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
                                    struct perf_event_attr *attr)
 {
        const char *evt_name;
-       char *flags;
+       char *flags = NULL, *comma_loc;
        char sys_name[MAX_EVENT_LENGTH];
        unsigned int sys_length, evt_length;
 
@@ -514,6 +514,11 @@ static enum event_result parse_tracepoint_event(const char **strp,
        sys_name[sys_length] = '\0';
        evt_name = evt_name + 1;
 
+       comma_loc = strchr(evt_name, ',');
+       if (comma_loc) {
+               /* take the event name up to the comma */
+               evt_name = strndup(evt_name, comma_loc - evt_name);
+       }
        flags = strchr(evt_name, ':');
        if (flags) {
                /* split it out: */
@@ -524,9 +529,8 @@ static enum event_result parse_tracepoint_event(const char **strp,
        evt_length = strlen(evt_name);
        if (evt_length >= MAX_EVENT_LENGTH)
                return EVT_FAILED;
-
        if (strpbrk(evt_name, "*?")) {
-               *strp = evt_name + evt_length;
+               *strp += strlen(sys_name) + evt_length;
                return parse_multiple_tracepoint_event(sys_name, evt_name,
                                                       flags);
        } else
@@ -810,9 +814,6 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
                        return -1;
 
        for (;;) {
-               if (nr_counters == MAX_COUNTERS)
-                       return -1;
-
                memset(&attr, 0, sizeof(attr));
                ret = parse_event_symbols(&str, &attr);
                if (ret == EVT_FAILED)
@@ -822,8 +823,13 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
                        return -1;
 
                if (ret != EVT_HANDLED_ALL) {
-                       attrs[nr_counters] = attr;
-                       nr_counters++;
+                       struct perf_evsel *evsel;
+                       evsel = perf_evsel__new(attr.type, attr.config,
+                                               nr_counters);
+                       if (evsel == NULL)
+                               return -1;
+                       list_add_tail(&evsel->node, &evsel_list);
+                       ++nr_counters;
                }
 
                if (*str == 0)
@@ -840,21 +846,22 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
 int parse_filter(const struct option *opt __used, const char *str,
                 int unset __used)
 {
-       int i = nr_counters - 1;
-       int len = strlen(str);
+       struct perf_evsel *last = NULL;
 
-       if (i < 0 || attrs[i].type != PERF_TYPE_TRACEPOINT) {
+       if (!list_empty(&evsel_list))
+               last = list_entry(evsel_list.prev, struct perf_evsel, node);
+
+       if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
                fprintf(stderr,
                        "-F option should follow a -e tracepoint option\n");
                return -1;
        }
 
-       filters[i] = malloc(len + 1);
-       if (!filters[i]) {
+       last->filter = strdup(str);
+       if (last->filter == NULL) {
                fprintf(stderr, "not enough memory to hold filter string\n");
                return -1;
        }
-       strcpy(filters[i], str);
 
        return 0;
 }
@@ -905,6 +912,47 @@ static void print_tracepoint_events(void)
        closedir(sys_dir);
 }
 
+/*
+ * Check whether event is in <debugfs_mount_point>/tracing/events
+ */
+
+int is_valid_tracepoint(const char *event_string)
+{
+       DIR *sys_dir, *evt_dir;
+       struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
+       char evt_path[MAXPATHLEN];
+       char dir_path[MAXPATHLEN];
+
+       if (debugfs_valid_mountpoint(debugfs_path))
+               return 0;
+
+       sys_dir = opendir(debugfs_path);
+       if (!sys_dir)
+               return 0;
+
+       for_each_subsystem(sys_dir, sys_dirent, sys_next) {
+
+               snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
+                        sys_dirent.d_name);
+               evt_dir = opendir(dir_path);
+               if (!evt_dir)
+                       continue;
+
+               for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
+                       snprintf(evt_path, MAXPATHLEN, "%s:%s",
+                                sys_dirent.d_name, evt_dirent.d_name);
+                       if (!strcmp(evt_path, event_string)) {
+                               closedir(evt_dir);
+                               closedir(sys_dir);
+                               return 1;
+                       }
+               }
+               closedir(evt_dir);
+       }
+       closedir(sys_dir);
+       return 0;
+}
+
 /*
  * Print the help text for the event symbols:
  */
@@ -963,3 +1011,26 @@ void print_events(void)
 
        exit(129);
 }
+
+int perf_evsel_list__create_default(void)
+{
+       struct perf_evsel *evsel = perf_evsel__new(PERF_TYPE_HARDWARE,
+                                                  PERF_COUNT_HW_CPU_CYCLES, 0);
+       if (evsel == NULL)
+               return -ENOMEM;
+
+       list_add(&evsel->node, &evsel_list);
+       ++nr_counters;
+       return 0;
+}
+
+void perf_evsel_list__delete(void)
+{
+       struct perf_evsel *pos, *n;
+
+       list_for_each_entry_safe(pos, n, &evsel_list, node) {
+               list_del_init(&pos->node);
+               perf_evsel__delete(pos);
+       }
+       nr_counters = 0;
+}
index fc4ab3fe877a22d191450a6bc4c75118cf71901d..b82cafb8377202462d16ef7ed36b8e546201c76b 100644 (file)
@@ -4,6 +4,16 @@
  * Parse symbolic events/counts passed in as options:
  */
 
+#include "../../../include/linux/perf_event.h"
+
+struct list_head;
+struct perf_evsel;
+
+extern struct list_head evsel_list;
+
+int perf_evsel_list__create_default(void);
+void perf_evsel_list__delete(void);
+
 struct option;
 
 struct tracepoint_path {
@@ -13,14 +23,11 @@ struct tracepoint_path {
 };
 
 extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
-extern bool have_tracepoints(struct perf_event_attr *pattrs, int nb_events);
+extern bool have_tracepoints(struct list_head *evsel_list);
 
 extern int                     nr_counters;
 
-extern struct perf_event_attr attrs[MAX_COUNTERS];
-extern char *filters[MAX_COUNTERS];
-
-extern const char *event_name(int ctr);
+const char *event_name(struct perf_evsel *event);
 extern const char *__event_name(int type, u64 config);
 
 extern int parse_events(const struct option *opt, const char *str, int unset);
@@ -29,9 +36,9 @@ extern int parse_filter(const struct option *opt, const char *str, int unset);
 #define EVENTS_HELP_MAX (128*1024)
 
 extern void print_events(void);
+extern int is_valid_tracepoint(const char *event_string);
 
 extern char debugfs_path[];
 extern int valid_debugfs_mount(const char *debugfs);
 
-
 #endif /* __PERF_PARSE_EVENTS_H */
index c7d72dce54b2cf7c3f46042bd6ce6a68c941b4d6..abc31a1dac1a738c5791512032c5a9337af84b9a 100644 (file)
@@ -119,6 +119,10 @@ struct option {
        { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
 #define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
        { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
+#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \
+       { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\
+       .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\
+       .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG}
 
 /* parse_options() will filter out the processed options and leave the
  * non-option argments in argv[].
index 3b6a5297bf16cd5a318273bc0a9bf198734a0cfe..128aaab0aedad86403a0c722211e4d57cb982d8d 100644 (file)
@@ -95,7 +95,7 @@ static int init_vmlinux(void)
                goto out;
 
        if (machine__create_kernel_maps(&machine) < 0) {
-               pr_debug("machine__create_kernel_maps ");
+               pr_debug("machine__create_kernel_maps() failed.\n");
                goto out;
        }
 out:
@@ -114,6 +114,8 @@ static struct symbol *__find_kernel_function_by_name(const char *name,
 const char *kernel_get_module_path(const char *module)
 {
        struct dso *dso;
+       struct map *map;
+       const char *vmlinux_name;
 
        if (module) {
                list_for_each_entry(dso, &machine.kernel_dsos, node) {
@@ -123,10 +125,17 @@ const char *kernel_get_module_path(const char *module)
                }
                pr_debug("Failed to find module %s.\n", module);
                return NULL;
+       }
+
+       map = machine.vmlinux_maps[MAP__FUNCTION];
+       dso = map->dso;
+
+       vmlinux_name = symbol_conf.vmlinux_name;
+       if (vmlinux_name) {
+               if (dso__load_vmlinux(dso, map, vmlinux_name, NULL) <= 0)
+                       return NULL;
        } else {
-               dso = machine.vmlinux_maps[MAP__FUNCTION]->dso;
-               if (dso__load_vmlinux_path(dso,
-                        machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) {
+               if (dso__load_vmlinux_path(dso, map, NULL) <= 0) {
                        pr_debug("Failed to load kernel map.\n");
                        return NULL;
                }
@@ -140,7 +149,8 @@ static int open_vmlinux(const char *module)
 {
        const char *path = kernel_get_module_path(module);
        if (!path) {
-               pr_err("Failed to find path of %s module", module ?: "kernel");
+               pr_err("Failed to find path of %s module.\n",
+                      module ?: "kernel");
                return -ENOENT;
        }
        pr_debug("Try to open %s\n", path);
@@ -217,7 +227,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
                pr_warning("Warning: No dwarf info found in the vmlinux - "
                        "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
                if (!need_dwarf) {
-                       pr_debug("Trying to use symbols.\nn");
+                       pr_debug("Trying to use symbols.\n");
                        return 0;
                }
        }
@@ -286,42 +296,49 @@ static int get_real_path(const char *raw_path, const char *comp_dir,
 #define LINEBUF_SIZE 256
 #define NR_ADDITIONAL_LINES 2
 
-static int show_one_line(FILE *fp, int l, bool skip, bool show_num)
+static int __show_one_line(FILE *fp, int l, bool skip, bool show_num)
 {
        char buf[LINEBUF_SIZE];
-       const char *color = PERF_COLOR_BLUE;
-
-       if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
-               goto error;
-       if (!skip) {
-               if (show_num)
-                       fprintf(stdout, "%7d  %s", l, buf);
-               else
-                       color_fprintf(stdout, color, "         %s", buf);
-       }
+       const char *color = show_num ? "" : PERF_COLOR_BLUE;
+       const char *prefix = NULL;
 
-       while (strlen(buf) == LINEBUF_SIZE - 1 &&
-              buf[LINEBUF_SIZE - 2] != '\n') {
+       do {
                if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
                        goto error;
-               if (!skip) {
-                       if (show_num)
-                               fprintf(stdout, "%s", buf);
-                       else
-                               color_fprintf(stdout, color, "%s", buf);
+               if (skip)
+                       continue;
+               if (!prefix) {
+                       prefix = show_num ? "%7d  " : "         ";
+                       color_fprintf(stdout, color, prefix, l);
                }
-       }
+               color_fprintf(stdout, color, "%s", buf);
 
-       return 0;
+       } while (strchr(buf, '\n') == NULL);
+
+       return 1;
 error:
-       if (feof(fp))
-               pr_warning("Source file is shorter than expected.\n");
-       else
+       if (ferror(fp)) {
                pr_warning("File read error: %s\n", strerror(errno));
+               return -1;
+       }
+       return 0;
+}
 
-       return -1;
+static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+       int rv = __show_one_line(fp, l, skip, show_num);
+       if (rv == 0) {
+               pr_warning("Source file is shorter than expected.\n");
+               rv = -1;
+       }
+       return rv;
 }
 
+#define show_one_line_with_num(f,l)    _show_one_line(f,l,false,true)
+#define show_one_line(f,l)             _show_one_line(f,l,false,false)
+#define skip_one_line(f,l)             _show_one_line(f,l,true,false)
+#define show_one_line_or_eof(f,l)      __show_one_line(f,l,false,false)
+
 /*
  * Show line-range always requires debuginfo to find source file and
  * line number.
@@ -370,7 +387,7 @@ int show_line_range(struct line_range *lr, const char *module)
                fprintf(stdout, "<%s:%d>\n", lr->function,
                        lr->start - lr->offset);
        else
-               fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
+               fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
 
        fp = fopen(lr->path, "r");
        if (fp == NULL) {
@@ -379,26 +396,30 @@ int show_line_range(struct line_range *lr, const char *module)
                return -errno;
        }
        /* Skip to starting line number */
-       while (l < lr->start && ret >= 0)
-               ret = show_one_line(fp, l++, true, false);
-       if (ret < 0)
-               goto end;
+       while (l < lr->start) {
+               ret = skip_one_line(fp, l++);
+               if (ret < 0)
+                       goto end;
+       }
 
        list_for_each_entry(ln, &lr->line_list, list) {
-               while (ln->line > l && ret >= 0)
-                       ret = show_one_line(fp, (l++) - lr->offset,
-                                           false, false);
-               if (ret >= 0)
-                       ret = show_one_line(fp, (l++) - lr->offset,
-                                           false, true);
+               for (; ln->line > l; l++) {
+                       ret = show_one_line(fp, l - lr->offset);
+                       if (ret < 0)
+                               goto end;
+               }
+               ret = show_one_line_with_num(fp, l++ - lr->offset);
                if (ret < 0)
                        goto end;
        }
 
        if (lr->end == INT_MAX)
                lr->end = l + NR_ADDITIONAL_LINES;
-       while (l <= lr->end && !feof(fp) && ret >= 0)
-               ret = show_one_line(fp, (l++) - lr->offset, false, false);
+       while (l <= lr->end) {
+               ret = show_one_line_or_eof(fp, l++ - lr->offset);
+               if (ret <= 0)
+                       break;
+       }
 end:
        fclose(fp);
        return ret;
@@ -457,7 +478,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
 
        fd = open_vmlinux(module);
        if (fd < 0) {
-               pr_warning("Failed to open debuginfo file.\n");
+               pr_warning("Failed to open debug information file.\n");
                return fd;
        }
 
@@ -517,56 +538,87 @@ int show_available_vars(struct perf_probe_event *pevs __unused,
 }
 #endif
 
+static int parse_line_num(char **ptr, int *val, const char *what)
+{
+       const char *start = *ptr;
+
+       errno = 0;
+       *val = strtol(*ptr, ptr, 0);
+       if (errno || *ptr == start) {
+               semantic_error("'%s' is not a valid number.\n", what);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+/*
+ * Stuff 'lr' according to the line range described by 'arg'.
+ * The line range syntax is described by:
+ *
+ *         SRC[:SLN[+NUM|-ELN]]
+ *         FNC[:SLN[+NUM|-ELN]]
+ */
 int parse_line_range_desc(const char *arg, struct line_range *lr)
 {
-       const char *ptr;
-       char *tmp;
-       /*
-        * <Syntax>
-        * SRC:SLN[+NUM|-ELN]
-        * FUNC[:SLN[+NUM|-ELN]]
-        */
-       ptr = strchr(arg, ':');
-       if (ptr) {
-               lr->start = (int)strtoul(ptr + 1, &tmp, 0);
-               if (*tmp == '+') {
-                       lr->end = lr->start + (int)strtoul(tmp + 1, &tmp, 0);
-                       lr->end--;      /*
-                                        * Adjust the number of lines here.
-                                        * If the number of lines == 1, the
-                                        * the end of line should be equal to
-                                        * the start of line.
-                                        */
-               } else if (*tmp == '-')
-                       lr->end = (int)strtoul(tmp + 1, &tmp, 0);
-               else
-                       lr->end = INT_MAX;
+       char *range, *name = strdup(arg);
+       int err;
+
+       if (!name)
+               return -ENOMEM;
+
+       lr->start = 0;
+       lr->end = INT_MAX;
+
+       range = strchr(name, ':');
+       if (range) {
+               *range++ = '\0';
+
+               err = parse_line_num(&range, &lr->start, "start line");
+               if (err)
+                       goto err;
+
+               if (*range == '+' || *range == '-') {
+                       const char c = *range++;
+
+                       err = parse_line_num(&range, &lr->end, "end line");
+                       if (err)
+                               goto err;
+
+                       if (c == '+') {
+                               lr->end += lr->start;
+                               /*
+                                * Adjust the number of lines here.
+                                * If the number of lines == 1, the
+                                * the end of line should be equal to
+                                * the start of line.
+                                */
+                               lr->end--;
+                       }
+               }
+
                pr_debug("Line range is %d to %d\n", lr->start, lr->end);
+
+               err = -EINVAL;
                if (lr->start > lr->end) {
                        semantic_error("Start line must be smaller"
                                       " than end line.\n");
-                       return -EINVAL;
+                       goto err;
                }
-               if (*tmp != '\0') {
-                       semantic_error("Tailing with invalid character '%d'.\n",
-                                      *tmp);
-                       return -EINVAL;
+               if (*range != '\0') {
+                       semantic_error("Tailing with invalid str '%s'.\n", range);
+                       goto err;
                }
-               tmp = strndup(arg, (ptr - arg));
-       } else {
-               tmp = strdup(arg);
-               lr->end = INT_MAX;
        }
 
-       if (tmp == NULL)
-               return -ENOMEM;
-
-       if (strchr(tmp, '.'))
-               lr->file = tmp;
+       if (strchr(name, '.'))
+               lr->file = name;
        else
-               lr->function = tmp;
+               lr->function = name;
 
        return 0;
+err:
+       free(name);
+       return err;
 }
 
 /* Check the name is good for event/group */
@@ -690,39 +742,40 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 
        /* Exclusion check */
        if (pp->lazy_line && pp->line) {
-               semantic_error("Lazy pattern can't be used with line number.");
+               semantic_error("Lazy pattern can't be used with"
+                              " line number.\n");
                return -EINVAL;
        }
 
        if (pp->lazy_line && pp->offset) {
-               semantic_error("Lazy pattern can't be used with offset.");
+               semantic_error("Lazy pattern can't be used with offset.\n");
                return -EINVAL;
        }
 
        if (pp->line && pp->offset) {
-               semantic_error("Offset can't be used with line number.");
+               semantic_error("Offset can't be used with line number.\n");
                return -EINVAL;
        }
 
        if (!pp->line && !pp->lazy_line && pp->file && !pp->function) {
                semantic_error("File always requires line number or "
-                              "lazy pattern.");
+                              "lazy pattern.\n");
                return -EINVAL;
        }
 
        if (pp->offset && !pp->function) {
-               semantic_error("Offset requires an entry function.");
+               semantic_error("Offset requires an entry function.\n");
                return -EINVAL;
        }
 
        if (pp->retprobe && !pp->function) {
-               semantic_error("Return probe requires an entry function.");
+               semantic_error("Return probe requires an entry function.\n");
                return -EINVAL;
        }
 
        if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
                semantic_error("Offset/Line/Lazy pattern can't be used with "
-                              "return probe.");
+                              "return probe.\n");
                return -EINVAL;
        }
 
@@ -996,7 +1049,7 @@ int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len)
 
        return tmp - buf;
 error:
-       pr_debug("Failed to synthesize perf probe argument: %s",
+       pr_debug("Failed to synthesize perf probe argument: %s\n",
                 strerror(-ret));
        return ret;
 }
@@ -1024,13 +1077,13 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
                        goto error;
        }
        if (pp->file) {
-               len = strlen(pp->file) - 31;
-               if (len < 0)
-                       len = 0;
-               tmp = strchr(pp->file + len, '/');
-               if (!tmp)
-                       tmp = pp->file + len;
-               ret = e_snprintf(file, 32, "@%s", tmp + 1);
+               tmp = pp->file;
+               len = strlen(tmp);
+               if (len > 30) {
+                       tmp = strchr(pp->file + len - 30, '/');
+                       tmp = tmp ? tmp + 1 : pp->file + len - 30;
+               }
+               ret = e_snprintf(file, 32, "@%s", tmp);
                if (ret <= 0)
                        goto error;
        }
@@ -1046,7 +1099,7 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
 
        return buf;
 error:
-       pr_debug("Failed to synthesize perf probe point: %s",
+       pr_debug("Failed to synthesize perf probe point: %s\n",
                 strerror(-ret));
        if (buf)
                free(buf);
@@ -1787,7 +1840,7 @@ static int del_trace_probe_event(int fd, const char *group,
 
        ret = e_snprintf(buf, 128, "%s:%s", group, event);
        if (ret < 0) {
-               pr_err("Failed to copy event.");
+               pr_err("Failed to copy event.\n");
                return ret;
        }
 
index 3991d73d1cff9164cb23fb4aae3169af97abfe77..ab83b6ac5d657c80af1e67790c1ace15d7592578 100644 (file)
@@ -117,28 +117,6 @@ static void line_list__free(struct list_head *head)
 }
 
 /* Dwarf FL wrappers */
-
-static int __linux_kernel_find_elf(Dwfl_Module *mod,
-                                  void **userdata,
-                                  const char *module_name,
-                                  Dwarf_Addr base,
-                                  char **file_name, Elf **elfp)
-{
-       int fd;
-       const char *path = kernel_get_module_path(module_name);
-
-       if (path) {
-               fd = open(path, O_RDONLY);
-               if (fd >= 0) {
-                       *file_name = strdup(path);
-                       return fd;
-               }
-       }
-       /* If failed, try to call standard method */
-       return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base,
-                                         file_name, elfp);
-}
-
 static char *debuginfo_path;   /* Currently dummy */
 
 static const Dwfl_Callbacks offline_callbacks = {
@@ -151,14 +129,6 @@ static const Dwfl_Callbacks offline_callbacks = {
        .find_elf = dwfl_build_id_find_elf,
 };
 
-static const Dwfl_Callbacks kernel_callbacks = {
-       .find_debuginfo = dwfl_standard_find_debuginfo,
-       .debuginfo_path = &debuginfo_path,
-
-       .find_elf = __linux_kernel_find_elf,
-       .section_address = dwfl_linux_kernel_module_section_address,
-};
-
 /* Get a Dwarf from offline image */
 static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias)
 {
@@ -185,6 +155,38 @@ error:
        return dbg;
 }
 
+#if _ELFUTILS_PREREQ(0, 148)
+/* This method is buggy if elfutils is older than 0.148 */
+static int __linux_kernel_find_elf(Dwfl_Module *mod,
+                                  void **userdata,
+                                  const char *module_name,
+                                  Dwarf_Addr base,
+                                  char **file_name, Elf **elfp)
+{
+       int fd;
+       const char *path = kernel_get_module_path(module_name);
+
+       pr_debug2("Use file %s for %s\n", path, module_name);
+       if (path) {
+               fd = open(path, O_RDONLY);
+               if (fd >= 0) {
+                       *file_name = strdup(path);
+                       return fd;
+               }
+       }
+       /* If failed, try to call standard method */
+       return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base,
+                                         file_name, elfp);
+}
+
+static const Dwfl_Callbacks kernel_callbacks = {
+       .find_debuginfo = dwfl_standard_find_debuginfo,
+       .debuginfo_path = &debuginfo_path,
+
+       .find_elf = __linux_kernel_find_elf,
+       .section_address = dwfl_linux_kernel_module_section_address,
+};
+
 /* Get a Dwarf from live kernel image */
 static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp,
                                          Dwarf_Addr *bias)
@@ -205,11 +207,34 @@ static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp,
        dbg = dwfl_addrdwarf(*dwflp, addr, bias);
        /* Here, check whether we could get a real dwarf */
        if (!dbg) {
+               pr_debug("Failed to find kernel dwarf at %lx\n",
+                        (unsigned long)addr);
                dwfl_end(*dwflp);
                *dwflp = NULL;
        }
        return dbg;
 }
+#else
+/* With older elfutils, this just support kernel module... */
+static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr __used, Dwfl **dwflp,
+                                         Dwarf_Addr *bias)
+{
+       int fd;
+       const char *path = kernel_get_module_path("kernel");
+
+       if (!path) {
+               pr_err("Failed to find vmlinux path\n");
+               return NULL;
+       }
+
+       pr_debug2("Use file %s for debuginfo\n", path);
+       fd = open(path, O_RDONLY);
+       if (fd < 0)
+               return NULL;
+
+       return dwfl_init_offline_dwarf(fd, dwflp, bias);
+}
+#endif
 
 /* Dwarf wrappers */
 
@@ -627,8 +652,8 @@ static_var:
        regs = get_arch_regstr(regn);
        if (!regs) {
                /* This should be a bug in DWARF or this tool */
-               pr_warning("Mapping for DWARF register number %u "
-                          "missing on this architecture.", regn);
+               pr_warning("Mapping for the register number %u "
+                          "missing on this architecture.\n", regn);
                return -ERANGE;
        }
 
@@ -674,13 +699,14 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                if (ret != DW_TAG_pointer_type &&
                    ret != DW_TAG_array_type) {
                        pr_warning("Failed to cast into string: "
-                                  "%s(%s) is not a pointer nor array.",
+                                  "%s(%s) is not a pointer nor array.\n",
                                   dwarf_diename(vr_die), dwarf_diename(&type));
                        return -EINVAL;
                }
                if (ret == DW_TAG_pointer_type) {
                        if (die_get_real_type(&type, &type) == NULL) {
-                               pr_warning("Failed to get a type information.");
+                               pr_warning("Failed to get a type"
+                                          " information.\n");
                                return -ENOENT;
                        }
                        while (*ref_ptr)
@@ -695,7 +721,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                if (!die_compare_name(&type, "char") &&
                    !die_compare_name(&type, "unsigned char")) {
                        pr_warning("Failed to cast into string: "
-                                  "%s is not (unsigned) char *.",
+                                  "%s is not (unsigned) char *.\n",
                                   dwarf_diename(vr_die));
                        return -EINVAL;
                }
@@ -805,8 +831,8 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
                        return -EINVAL;
                }
                if (field->name[0] == '[') {
-                       pr_err("Semantic error: %s is not a pointor nor array.",
-                              varname);
+                       pr_err("Semantic error: %s is not a pointor"
+                              " nor array.\n", varname);
                        return -EINVAL;
                }
                if (field->ref) {
@@ -953,7 +979,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
        name = dwarf_diename(sp_die);
        if (name) {
                if (dwarf_entrypc(sp_die, &eaddr) != 0) {
-                       pr_warning("Failed to get entry pc of %s\n",
+                       pr_warning("Failed to get entry address of %s\n",
                                   dwarf_diename(sp_die));
                        return -ENOENT;
                }
@@ -969,7 +995,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
        if (retprobe) {
                if (eaddr != paddr) {
                        pr_warning("Return probe must be on the head of"
-                                  " a real function\n");
+                                  " a real function.\n");
                        return -EINVAL;
                }
                tp->retprobe = true;
@@ -1008,7 +1034,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
                Dwarf_Frame *frame;
                if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
                    dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
-                       pr_warning("Failed to get CFA on 0x%jx\n",
+                       pr_warning("Failed to get call frame on 0x%jx\n",
                                   (uintmax_t)pf->addr);
                        return -ENOENT;
                }
@@ -1035,7 +1061,7 @@ static int find_probe_point_by_line(struct probe_finder *pf)
        int ret = 0;
 
        if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
-               pr_warning("No source lines found in this CU.\n");
+               pr_warning("No source lines found.\n");
                return -ENOENT;
        }
 
@@ -1137,7 +1163,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
        }
 
        if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
-               pr_warning("No source lines found in this CU.\n");
+               pr_warning("No source lines found.\n");
                return -ENOENT;
        }
 
@@ -1195,7 +1221,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
        else {
                /* Get probe address */
                if (dwarf_entrypc(in_die, &addr) != 0) {
-                       pr_warning("Failed to get entry pc of %s.\n",
+                       pr_warning("Failed to get entry address of %s.\n",
                                   dwarf_diename(in_die));
                        param->retval = -ENOENT;
                        return DWARF_CB_ABORT;
@@ -1236,8 +1262,8 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
                        param->retval = find_probe_point_lazy(sp_die, pf);
                else {
                        if (dwarf_entrypc(sp_die, &pf->addr) != 0) {
-                               pr_warning("Failed to get entry pc of %s.\n",
-                                          dwarf_diename(sp_die));
+                               pr_warning("Failed to get entry address of "
+                                          "%s.\n", dwarf_diename(sp_die));
                                param->retval = -ENOENT;
                                return DWARF_CB_ABORT;
                        }
@@ -1279,7 +1305,7 @@ static int find_probes(int fd, struct probe_finder *pf)
 
        dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
        if (!dbg) {
-               pr_warning("No dwarf info found in the vmlinux - "
+               pr_warning("No debug information found in the vmlinux - "
                        "please rebuild with CONFIG_DEBUG_INFO=y.\n");
                return -EBADF;
        }
@@ -1524,7 +1550,7 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
        /* Open the live linux kernel */
        dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
        if (!dbg) {
-               pr_warning("No dwarf info found in the vmlinux - "
+               pr_warning("No debug information found in the vmlinux - "
                        "please rebuild with CONFIG_DEBUG_INFO=y.\n");
                ret = -EINVAL;
                goto end;
@@ -1534,7 +1560,8 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
        addr += bias;
        /* Find cu die */
        if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) {
-               pr_warning("No CU DIE is found at %lx\n", addr);
+               pr_warning("Failed to find debug information for address %lx\n",
+                          addr);
                ret = -EINVAL;
                goto end;
        }
@@ -1659,7 +1686,7 @@ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
 
        line_list__init(&lf->lr->line_list);
        if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
-               pr_warning("No source lines found in this CU.\n");
+               pr_warning("No source lines found.\n");
                return -ENOENT;
        }
 
@@ -1784,7 +1811,7 @@ int find_line_range(int fd, struct line_range *lr)
 
        dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
        if (!dbg) {
-               pr_warning("No dwarf info found in the vmlinux - "
+               pr_warning("No debug information found in the vmlinux - "
                        "please rebuild with CONFIG_DEBUG_INFO=y.\n");
                return -EBADF;
        }
index bba69d4556999e5081b018857acd230de6740eea..beaefc3c1223df16423cbd084f700b6ee5a5fbf2 100644 (file)
@@ -34,9 +34,9 @@ extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
                                  bool externs);
 
 #include <dwarf.h>
-#include <libdw.h>
-#include <libdwfl.h>
-#include <version.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <elfutils/version.h>
 
 struct probe_finder {
        struct perf_probe_event *pev;           /* Target probe event */
index b059dc50cc2db9021b75435e9aac132174c6dbec..93680818e244ca8a2e58f49e59af0ca729eeb50e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * trace-event-perl.  Feed perf trace events to an embedded Perl interpreter.
+ * trace-event-perl.  Feed perf script events to an embedded Perl interpreter.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
@@ -411,8 +411,8 @@ static int perl_generate_script(const char *outfile)
                return -1;
        }
 
-       fprintf(ofp, "# perf trace event handlers, "
-               "generated by perf trace -g perl\n");
+       fprintf(ofp, "# perf script event handlers, "
+               "generated by perf script -g perl\n");
 
        fprintf(ofp, "# Licensed under the terms of the GNU GPL"
                " License version 2\n\n");
index 33a632523743deff80cb9e3636bd9144b84b2f76..c6d99334bdfa836c1adba2d613b658fadf20797b 100644 (file)
@@ -442,8 +442,8 @@ static int python_generate_script(const char *outfile)
                fprintf(stderr, "couldn't open %s\n", fname);
                return -1;
        }
-       fprintf(ofp, "# perf trace event handlers, "
-               "generated by perf trace -g python\n");
+       fprintf(ofp, "# perf script event handlers, "
+               "generated by perf script -g python\n");
 
        fprintf(ofp, "# Licensed under the terms of the GNU GPL"
                " License version 2\n\n");
index fa9d652c2dc3c07182028d4a196293333be75d78..6fb4694d05fa1e2c89e5ac91207c025214031f38 100644 (file)
@@ -65,9 +65,49 @@ out_close:
        return -1;
 }
 
+static void perf_session__id_header_size(struct perf_session *session)
+{
+       struct sample_data *data;
+       u64 sample_type = session->sample_type;
+       u16 size = 0;
+
+       if (!session->sample_id_all)
+               goto out;
+
+       if (sample_type & PERF_SAMPLE_TID)
+               size += sizeof(data->tid) * 2;
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               size += sizeof(data->time);
+
+       if (sample_type & PERF_SAMPLE_ID)
+               size += sizeof(data->id);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               size += sizeof(data->stream_id);
+
+       if (sample_type & PERF_SAMPLE_CPU)
+               size += sizeof(data->cpu) * 2;
+out:
+       session->id_hdr_size = size;
+}
+
+void perf_session__set_sample_id_all(struct perf_session *session, bool value)
+{
+       session->sample_id_all = value;
+       perf_session__id_header_size(session);
+}
+
+void perf_session__set_sample_type(struct perf_session *session, u64 type)
+{
+       session->sample_type = type;
+}
+
 void perf_session__update_sample_type(struct perf_session *self)
 {
        self->sample_type = perf_header__sample_type(&self->header);
+       self->sample_id_all = perf_header__sample_id_all(&self->header);
+       perf_session__id_header_size(self);
 }
 
 int perf_session__create_kernel_maps(struct perf_session *self)
@@ -85,7 +125,9 @@ static void perf_session__destroy_kernel_maps(struct perf_session *self)
        machines__destroy_guest_kernel_maps(&self->machines);
 }
 
-struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe)
+struct perf_session *perf_session__new(const char *filename, int mode,
+                                      bool force, bool repipe,
+                                      struct perf_event_ops *ops)
 {
        size_t len = filename ? strlen(filename) + 1 : 0;
        struct perf_session *self = zalloc(sizeof(*self) + len);
@@ -101,10 +143,20 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
        INIT_LIST_HEAD(&self->dead_threads);
        self->hists_tree = RB_ROOT;
        self->last_match = NULL;
-       self->mmap_window = 32;
+       /*
+        * On 64bit we can mmap the data file in one go. No need for tiny mmap
+        * slices. On 32bit we use 32MB.
+        */
+#if BITS_PER_LONG == 64
+       self->mmap_window = ULLONG_MAX;
+#else
+       self->mmap_window = 32 * 1024 * 1024ULL;
+#endif
        self->machines = RB_ROOT;
        self->repipe = repipe;
-       INIT_LIST_HEAD(&self->ordered_samples.samples_head);
+       INIT_LIST_HEAD(&self->ordered_samples.samples);
+       INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
+       INIT_LIST_HEAD(&self->ordered_samples.to_free);
        machine__init(&self->host_machine, "", HOST_KERNEL_ID);
 
        if (mode == O_RDONLY) {
@@ -120,6 +172,13 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
        }
 
        perf_session__update_sample_type(self);
+
+       if (ops && ops->ordering_requires_timestamps &&
+           ops->ordered_samples && !self->sample_id_all) {
+               dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
+               ops->ordered_samples = false;
+       }
+
 out:
        return self;
 out_free:
@@ -230,7 +289,15 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
        return syms;
 }
 
+static int process_event_synth_stub(event_t *event __used,
+                                   struct perf_session *session __used)
+{
+       dump_printf(": unhandled!\n");
+       return 0;
+}
+
 static int process_event_stub(event_t *event __used,
+                             struct sample_data *sample __used,
                              struct perf_session *session __used)
 {
        dump_printf(": unhandled!\n");
@@ -262,7 +329,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
        if (handler->exit == NULL)
                handler->exit = process_event_stub;
        if (handler->lost == NULL)
-               handler->lost = process_event_stub;
+               handler->lost = event__process_lost;
        if (handler->read == NULL)
                handler->read = process_event_stub;
        if (handler->throttle == NULL)
@@ -270,13 +337,13 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
        if (handler->unthrottle == NULL)
                handler->unthrottle = process_event_stub;
        if (handler->attr == NULL)
-               handler->attr = process_event_stub;
+               handler->attr = process_event_synth_stub;
        if (handler->event_type == NULL)
-               handler->event_type = process_event_stub;
+               handler->event_type = process_event_synth_stub;
        if (handler->tracing_data == NULL)
-               handler->tracing_data = process_event_stub;
+               handler->tracing_data = process_event_synth_stub;
        if (handler->build_id == NULL)
-               handler->build_id = process_event_stub;
+               handler->build_id = process_event_synth_stub;
        if (handler->finished_round == NULL) {
                if (handler->ordered_samples)
                        handler->finished_round = process_finished_round;
@@ -386,33 +453,61 @@ static event__swap_op event__swap_ops[] = {
 
 struct sample_queue {
        u64                     timestamp;
-       struct sample_event     *event;
+       u64                     file_offset;
+       event_t                 *event;
        struct list_head        list;
 };
 
+static void perf_session_free_sample_buffers(struct perf_session *session)
+{
+       struct ordered_samples *os = &session->ordered_samples;
+
+       while (!list_empty(&os->to_free)) {
+               struct sample_queue *sq;
+
+               sq = list_entry(os->to_free.next, struct sample_queue, list);
+               list_del(&sq->list);
+               free(sq);
+       }
+}
+
+static int perf_session_deliver_event(struct perf_session *session,
+                                     event_t *event,
+                                     struct sample_data *sample,
+                                     struct perf_event_ops *ops,
+                                     u64 file_offset);
+
 static void flush_sample_queue(struct perf_session *s,
                               struct perf_event_ops *ops)
 {
-       struct list_head *head = &s->ordered_samples.samples_head;
-       u64 limit = s->ordered_samples.next_flush;
+       struct ordered_samples *os = &s->ordered_samples;
+       struct list_head *head = &os->samples;
        struct sample_queue *tmp, *iter;
+       struct sample_data sample;
+       u64 limit = os->next_flush;
+       u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
 
        if (!ops->ordered_samples || !limit)
                return;
 
        list_for_each_entry_safe(iter, tmp, head, list) {
                if (iter->timestamp > limit)
-                       return;
+                       break;
 
-               if (iter == s->ordered_samples.last_inserted)
-                       s->ordered_samples.last_inserted = NULL;
+               event__parse_sample(iter->event, s, &sample);
+               perf_session_deliver_event(s, iter->event, &sample, ops,
+                                          iter->file_offset);
 
-               ops->sample((event_t *)iter->event, s);
-
-               s->ordered_samples.last_flush = iter->timestamp;
+               os->last_flush = iter->timestamp;
                list_del(&iter->list);
-               free(iter->event);
-               free(iter);
+               list_add(&iter->list, &os->sample_cache);
+       }
+
+       if (list_empty(head)) {
+               os->last_sample = NULL;
+       } else if (last_ts <= limit) {
+               os->last_sample =
+                       list_entry(head->prev, struct sample_queue, list);
        }
 }
 
@@ -465,178 +560,265 @@ static int process_finished_round(event_t *event __used,
        return 0;
 }
 
-static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
-{
-       struct sample_queue *iter;
-
-       list_for_each_entry_reverse(iter, head, list) {
-               if (iter->timestamp < new->timestamp) {
-                       list_add(&new->list, &iter->list);
-                       return;
-               }
-       }
-
-       list_add(&new->list, head);
-}
-
-static void __queue_sample_before(struct sample_queue *new,
-                                 struct sample_queue *iter,
-                                 struct list_head *head)
-{
-       list_for_each_entry_continue_reverse(iter, head, list) {
-               if (iter->timestamp < new->timestamp) {
-                       list_add(&new->list, &iter->list);
-                       return;
-               }
-       }
-
-       list_add(&new->list, head);
-}
-
-static void __queue_sample_after(struct sample_queue *new,
-                                struct sample_queue *iter,
-                                struct list_head *head)
-{
-       list_for_each_entry_continue(iter, head, list) {
-               if (iter->timestamp > new->timestamp) {
-                       list_add_tail(&new->list, &iter->list);
-                       return;
-               }
-       }
-       list_add_tail(&new->list, head);
-}
-
 /* The queue is ordered by time */
-static void __queue_sample_event(struct sample_queue *new,
-                                struct perf_session *s)
+static void __queue_event(struct sample_queue *new, struct perf_session *s)
 {
-       struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
-       struct list_head *head = &s->ordered_samples.samples_head;
+       struct ordered_samples *os = &s->ordered_samples;
+       struct sample_queue *sample = os->last_sample;
+       u64 timestamp = new->timestamp;
+       struct list_head *p;
 
+       os->last_sample = new;
 
-       if (!last_inserted) {
-               __queue_sample_end(new, head);
+       if (!sample) {
+               list_add(&new->list, &os->samples);
+               os->max_timestamp = timestamp;
                return;
        }
 
        /*
-        * Most of the time the current event has a timestamp
-        * very close to the last event inserted, unless we just switched
-        * to another event buffer. Having a sorting based on a list and
-        * on the last inserted event that is close to the current one is
-        * probably more efficient than an rbtree based sorting.
+        * last_sample might point to some random place in the list as it's
+        * the last queued event. We expect that the new event is close to
+        * this.
         */
-       if (last_inserted->timestamp >= new->timestamp)
-               __queue_sample_before(new, last_inserted, head);
-       else
-               __queue_sample_after(new, last_inserted, head);
+       if (sample->timestamp <= timestamp) {
+               while (sample->timestamp <= timestamp) {
+                       p = sample->list.next;
+                       if (p == &os->samples) {
+                               list_add_tail(&new->list, &os->samples);
+                               os->max_timestamp = timestamp;
+                               return;
+                       }
+                       sample = list_entry(p, struct sample_queue, list);
+               }
+               list_add_tail(&new->list, &sample->list);
+       } else {
+               while (sample->timestamp > timestamp) {
+                       p = sample->list.prev;
+                       if (p == &os->samples) {
+                               list_add(&new->list, &os->samples);
+                               return;
+                       }
+                       sample = list_entry(p, struct sample_queue, list);
+               }
+               list_add(&new->list, &sample->list);
+       }
 }
 
-static int queue_sample_event(event_t *event, struct sample_data *data,
-                             struct perf_session *s)
+#define MAX_SAMPLE_BUFFER      (64 * 1024 / sizeof(struct sample_queue))
+
+static int perf_session_queue_event(struct perf_session *s, event_t *event,
+                                   struct sample_data *data, u64 file_offset)
 {
+       struct ordered_samples *os = &s->ordered_samples;
+       struct list_head *sc = &os->sample_cache;
        u64 timestamp = data->time;
        struct sample_queue *new;
 
+       if (!timestamp || timestamp == ~0ULL)
+               return -ETIME;
 
        if (timestamp < s->ordered_samples.last_flush) {
                printf("Warning: Timestamp below last timeslice flush\n");
                return -EINVAL;
        }
 
-       new = malloc(sizeof(*new));
-       if (!new)
-               return -ENOMEM;
+       if (!list_empty(sc)) {
+               new = list_entry(sc->next, struct sample_queue, list);
+               list_del(&new->list);
+       } else if (os->sample_buffer) {
+               new = os->sample_buffer + os->sample_buffer_idx;
+               if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
+                       os->sample_buffer = NULL;
+       } else {
+               os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
+               if (!os->sample_buffer)
+                       return -ENOMEM;
+               list_add(&os->sample_buffer->list, &os->to_free);
+               os->sample_buffer_idx = 2;
+               new = os->sample_buffer + 1;
+       }
 
        new->timestamp = timestamp;
+       new->file_offset = file_offset;
+       new->event = event;
 
-       new->event = malloc(event->header.size);
-       if (!new->event) {
-               free(new);
-               return -ENOMEM;
-       }
+       __queue_event(new, s);
 
-       memcpy(new->event, event, event->header.size);
+       return 0;
+}
 
-       __queue_sample_event(new, s);
-       s->ordered_samples.last_inserted = new;
+static void callchain__printf(struct sample_data *sample)
+{
+       unsigned int i;
 
-       if (new->timestamp > s->ordered_samples.max_timestamp)
-               s->ordered_samples.max_timestamp = new->timestamp;
+       printf("... chain: nr:%Lu\n", sample->callchain->nr);
 
-       return 0;
+       for (i = 0; i < sample->callchain->nr; i++)
+               printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
 }
 
-static int perf_session__process_sample(event_t *event, struct perf_session *s,
-                                       struct perf_event_ops *ops)
+static void perf_session__print_tstamp(struct perf_session *session,
+                                      event_t *event,
+                                      struct sample_data *sample)
 {
-       struct sample_data data;
+       if (event->header.type != PERF_RECORD_SAMPLE &&
+           !session->sample_id_all) {
+               fputs("-1 -1 ", stdout);
+               return;
+       }
 
-       if (!ops->ordered_samples)
-               return ops->sample(event, s);
+       if ((session->sample_type & PERF_SAMPLE_CPU))
+               printf("%u ", sample->cpu);
 
-       bzero(&data, sizeof(struct sample_data));
-       event__parse_sample(event, s->sample_type, &data);
+       if (session->sample_type & PERF_SAMPLE_TIME)
+               printf("%Lu ", sample->time);
+}
 
-       queue_sample_event(event, &data, s);
+static void dump_event(struct perf_session *session, event_t *event,
+                      u64 file_offset, struct sample_data *sample)
+{
+       if (!dump_trace)
+               return;
 
-       return 0;
+       printf("\n%#Lx [%#x]: event: %d\n", file_offset, event->header.size,
+              event->header.type);
+
+       trace_event(event);
+
+       if (sample)
+               perf_session__print_tstamp(session, event, sample);
+
+       printf("%#Lx [%#x]: PERF_RECORD_%s", file_offset, event->header.size,
+              event__get_event_name(event->header.type));
 }
 
-static int perf_session__process_event(struct perf_session *self,
-                                      event_t *event,
-                                      struct perf_event_ops *ops,
-                                      u64 offset, u64 head)
+static void dump_sample(struct perf_session *session, event_t *event,
+                       struct sample_data *sample)
 {
-       trace_event(event);
+       if (!dump_trace)
+               return;
 
-       if (event->header.type < PERF_RECORD_HEADER_MAX) {
-               dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
-                           offset + head, event->header.size,
-                           event__name[event->header.type]);
-               hists__inc_nr_events(&self->hists, event->header.type);
-       }
+       printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+              sample->pid, sample->tid, sample->ip, sample->period);
 
-       if (self->header.needs_swap && event__swap_ops[event->header.type])
-               event__swap_ops[event->header.type](event);
+       if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
+               callchain__printf(sample);
+}
+
+static int perf_session_deliver_event(struct perf_session *session,
+                                     event_t *event,
+                                     struct sample_data *sample,
+                                     struct perf_event_ops *ops,
+                                     u64 file_offset)
+{
+       dump_event(session, event, file_offset, sample);
 
        switch (event->header.type) {
        case PERF_RECORD_SAMPLE:
-               return perf_session__process_sample(event, self, ops);
+               dump_sample(session, event, sample);
+               return ops->sample(event, sample, session);
        case PERF_RECORD_MMAP:
-               return ops->mmap(event, self);
+               return ops->mmap(event, sample, session);
        case PERF_RECORD_COMM:
-               return ops->comm(event, self);
+               return ops->comm(event, sample, session);
        case PERF_RECORD_FORK:
-               return ops->fork(event, self);
+               return ops->fork(event, sample, session);
        case PERF_RECORD_EXIT:
-               return ops->exit(event, self);
+               return ops->exit(event, sample, session);
        case PERF_RECORD_LOST:
-               return ops->lost(event, self);
+               return ops->lost(event, sample, session);
        case PERF_RECORD_READ:
-               return ops->read(event, self);
+               return ops->read(event, sample, session);
        case PERF_RECORD_THROTTLE:
-               return ops->throttle(event, self);
+               return ops->throttle(event, sample, session);
        case PERF_RECORD_UNTHROTTLE:
-               return ops->unthrottle(event, self);
+               return ops->unthrottle(event, sample, session);
+       default:
+               ++session->hists.stats.nr_unknown_events;
+               return -1;
+       }
+}
+
+static int perf_session__preprocess_sample(struct perf_session *session,
+                                          event_t *event, struct sample_data *sample)
+{
+       if (event->header.type != PERF_RECORD_SAMPLE ||
+           !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
+               return 0;
+
+       if (!ip_callchain__valid(sample->callchain, event)) {
+               pr_debug("call-chain problem with event, skipping it.\n");
+               ++session->hists.stats.nr_invalid_chains;
+               session->hists.stats.total_invalid_chains += sample->period;
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int perf_session__process_user_event(struct perf_session *session, event_t *event,
+                                           struct perf_event_ops *ops, u64 file_offset)
+{
+       dump_event(session, event, file_offset, NULL);
+
+       /* These events are processed right away */
+       switch (event->header.type) {
        case PERF_RECORD_HEADER_ATTR:
-               return ops->attr(event, self);
+               return ops->attr(event, session);
        case PERF_RECORD_HEADER_EVENT_TYPE:
-               return ops->event_type(event, self);
+               return ops->event_type(event, session);
        case PERF_RECORD_HEADER_TRACING_DATA:
                /* setup for reading amidst mmap */
-               lseek(self->fd, offset + head, SEEK_SET);
-               return ops->tracing_data(event, self);
+               lseek(session->fd, file_offset, SEEK_SET);
+               return ops->tracing_data(event, session);
        case PERF_RECORD_HEADER_BUILD_ID:
-               return ops->build_id(event, self);
+               return ops->build_id(event, session);
        case PERF_RECORD_FINISHED_ROUND:
-               return ops->finished_round(event, self, ops);
+               return ops->finished_round(event, session, ops);
        default:
-               ++self->hists.stats.nr_unknown_events;
-               return -1;
+               return -EINVAL;
        }
 }
 
+static int perf_session__process_event(struct perf_session *session,
+                                      event_t *event,
+                                      struct perf_event_ops *ops,
+                                      u64 file_offset)
+{
+       struct sample_data sample;
+       int ret;
+
+       if (session->header.needs_swap && event__swap_ops[event->header.type])
+               event__swap_ops[event->header.type](event);
+
+       if (event->header.type >= PERF_RECORD_HEADER_MAX)
+               return -EINVAL;
+
+       hists__inc_nr_events(&session->hists, event->header.type);
+
+       if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+               return perf_session__process_user_event(session, event, ops, file_offset);
+
+       /*
+        * For all kernel events we get the sample data
+        */
+       event__parse_sample(event, session, &sample);
+
+       /* Preprocess sample records - precheck callchains */
+       if (perf_session__preprocess_sample(session, event, &sample))
+               return 0;
+
+       if (ops->ordered_samples) {
+               ret = perf_session_queue_event(session, event, &sample,
+                                              file_offset);
+               if (ret != -ETIME)
+                       return ret;
+       }
+
+       return perf_session_deliver_event(session, event, &sample, ops,
+                                         file_offset);
+}
+
 void perf_event_header__bswap(struct perf_event_header *self)
 {
        self->type = bswap_32(self->type);
@@ -656,21 +838,33 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
        return thread;
 }
 
-int do_read(int fd, void *buf, size_t size)
+static void perf_session__warn_about_errors(const struct perf_session *session,
+                                           const struct perf_event_ops *ops)
 {
-       void *buf_start = buf;
-
-       while (size) {
-               int ret = read(fd, buf, size);
-
-               if (ret <= 0)
-                       return ret;
+       if (ops->lost == event__process_lost &&
+           session->hists.stats.total_lost != 0) {
+               ui__warning("Processed %Lu events and LOST %Lu!\n\n"
+                           "Check IO/CPU overload!\n\n",
+                           session->hists.stats.total_period,
+                           session->hists.stats.total_lost);
+       }
 
-               size -= ret;
-               buf += ret;
+       if (session->hists.stats.nr_unknown_events != 0) {
+               ui__warning("Found %u unknown events!\n\n"
+                           "Is this an older tool processing a perf.data "
+                           "file generated by a more recent tool?\n\n"
+                           "If that is not the case, consider "
+                           "reporting to linux-kernel@vger.kernel.org.\n\n",
+                           session->hists.stats.nr_unknown_events);
        }
 
-       return buf - buf_start;
+       if (session->hists.stats.nr_invalid_chains != 0) {
+               ui__warning("Found invalid callchains!\n\n"
+                           "%u out of %u events were discarded for this reason.\n\n"
+                           "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
+                           session->hists.stats.nr_invalid_chains,
+                           session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
+       }
 }
 
 #define session_done() (*(volatile int *)(&session_done))
@@ -690,7 +884,7 @@ static int __perf_session__process_pipe_events(struct perf_session *self,
 
        head = 0;
 more:
-       err = do_read(self->fd, &event, sizeof(struct perf_event_header));
+       err = readn(self->fd, &event, sizeof(struct perf_event_header));
        if (err <= 0) {
                if (err == 0)
                        goto done;
@@ -710,8 +904,7 @@ more:
        p += sizeof(struct perf_event_header);
 
        if (size - sizeof(struct perf_event_header)) {
-               err = do_read(self->fd, p,
-                             size - sizeof(struct perf_event_header));
+               err = readn(self->fd, p, size - sizeof(struct perf_event_header));
                if (err <= 0) {
                        if (err == 0) {
                                pr_err("unexpected end of event stream\n");
@@ -724,8 +917,7 @@ more:
        }
 
        if (size == 0 ||
-           (skip = perf_session__process_event(self, &event, ops,
-                                               0, head)) < 0) {
+           (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
                dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
                            head, event.header.size, event.header.type);
                /*
@@ -740,9 +932,6 @@ more:
 
        head += size;
 
-       dump_printf("\n%#Lx [%#x]: event: %d\n",
-                   head, event.header.size, event.header.type);
-
        if (skip > 0)
                head += skip;
 
@@ -751,82 +940,91 @@ more:
 done:
        err = 0;
 out_err:
+       perf_session__warn_about_errors(self, ops);
+       perf_session_free_sample_buffers(self);
        return err;
 }
 
-int __perf_session__process_events(struct perf_session *self,
+int __perf_session__process_events(struct perf_session *session,
                                   u64 data_offset, u64 data_size,
                                   u64 file_size, struct perf_event_ops *ops)
 {
-       int err, mmap_prot, mmap_flags;
-       u64 head, shift;
-       u64 offset = 0;
-       size_t  page_size;
+       u64 head, page_offset, file_offset, file_pos, progress_next;
+       int err, mmap_prot, mmap_flags, map_idx = 0;
+       struct ui_progress *progress;
+       size_t  page_size, mmap_size;
+       char *buf, *mmaps[8];
        event_t *event;
        uint32_t size;
-       char *buf;
-       struct ui_progress *progress = ui_progress__new("Processing events...",
-                                                       self->size);
-       if (progress == NULL)
-               return -1;
 
        perf_event_ops__fill_defaults(ops);
 
        page_size = sysconf(_SC_PAGESIZE);
 
-       head = data_offset;
-       shift = page_size * (head / page_size);
-       offset += shift;
-       head -= shift;
+       page_offset = page_size * (data_offset / page_size);
+       file_offset = page_offset;
+       head = data_offset - page_offset;
+
+       if (data_offset + data_size < file_size)
+               file_size = data_offset + data_size;
+
+       progress_next = file_size / 16;
+       progress = ui_progress__new("Processing events...", file_size);
+       if (progress == NULL)
+               return -1;
+
+       mmap_size = session->mmap_window;
+       if (mmap_size > file_size)
+               mmap_size = file_size;
+
+       memset(mmaps, 0, sizeof(mmaps));
 
        mmap_prot  = PROT_READ;
        mmap_flags = MAP_SHARED;
 
-       if (self->header.needs_swap) {
+       if (session->header.needs_swap) {
                mmap_prot  |= PROT_WRITE;
                mmap_flags = MAP_PRIVATE;
        }
 remap:
-       buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
-                  mmap_flags, self->fd, offset);
+       buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
+                  file_offset);
        if (buf == MAP_FAILED) {
                pr_err("failed to mmap file\n");
                err = -errno;
                goto out_err;
        }
+       mmaps[map_idx] = buf;
+       map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
+       file_pos = file_offset + head;
 
 more:
        event = (event_t *)(buf + head);
-       ui_progress__update(progress, offset);
 
-       if (self->header.needs_swap)
+       if (session->header.needs_swap)
                perf_event_header__bswap(&event->header);
        size = event->header.size;
        if (size == 0)
                size = 8;
 
-       if (head + event->header.size >= page_size * self->mmap_window) {
-               int munmap_ret;
-
-               shift = page_size * (head / page_size);
-
-               munmap_ret = munmap(buf, page_size * self->mmap_window);
-               assert(munmap_ret == 0);
+       if (head + event->header.size >= mmap_size) {
+               if (mmaps[map_idx]) {
+                       munmap(mmaps[map_idx], mmap_size);
+                       mmaps[map_idx] = NULL;
+               }
 
-               offset += shift;
-               head -= shift;
+               page_offset = page_size * (head / page_size);
+               file_offset += page_offset;
+               head -= page_offset;
                goto remap;
        }
 
        size = event->header.size;
 
-       dump_printf("\n%#Lx [%#x]: event: %d\n",
-                   offset + head, event->header.size, event->header.type);
-
        if (size == 0 ||
-           perf_session__process_event(self, event, ops, offset, head) < 0) {
+           perf_session__process_event(session, event, ops, file_pos) < 0) {
                dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
-                           offset + head, event->header.size,
+                           file_offset + head, event->header.size,
                            event->header.type);
                /*
                 * assume we lost track of the stream, check alignment, and
@@ -839,19 +1037,24 @@ more:
        }
 
        head += size;
+       file_pos += size;
 
-       if (offset + head >= data_offset + data_size)
-               goto done;
+       if (file_pos >= progress_next) {
+               progress_next += file_size / 16;
+               ui_progress__update(progress, file_pos);
+       }
 
-       if (offset + head < file_size)
+       if (file_pos < file_size)
                goto more;
-done:
+
        err = 0;
        /* do the final flush for ordered samples */
-       self->ordered_samples.next_flush = ULLONG_MAX;
-       flush_sample_queue(self, ops);
+       session->ordered_samples.next_flush = ULLONG_MAX;
+       flush_sample_queue(session, ops);
 out_err:
        ui_progress__delete(progress);
+       perf_session__warn_about_errors(session, ops);
+       perf_session_free_sample_buffers(session);
        return err;
 }
 
index 9fa0fc2a863f1259caf8caf7494f867d0737a297..decd83f274fd3642fecf039740e75a015431df4d 100644 (file)
@@ -17,8 +17,12 @@ struct ordered_samples {
        u64                     last_flush;
        u64                     next_flush;
        u64                     max_timestamp;
-       struct list_head        samples_head;
-       struct sample_queue     *last_inserted;
+       struct list_head        samples;
+       struct list_head        sample_cache;
+       struct list_head        to_free;
+       struct sample_queue     *sample_buffer;
+       struct sample_queue     *last_sample;
+       int                     sample_buffer_idx;
 };
 
 struct perf_session {
@@ -42,6 +46,8 @@ struct perf_session {
        int                     fd;
        bool                    fd_pipe;
        bool                    repipe;
+       bool                    sample_id_all;
+       u16                     id_hdr_size;
        int                     cwdlen;
        char                    *cwd;
        struct ordered_samples  ordered_samples;
@@ -50,7 +56,9 @@ struct perf_session {
 
 struct perf_event_ops;
 
-typedef int (*event_op)(event_t *self, struct perf_session *session);
+typedef int (*event_op)(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+typedef int (*event_synth_op)(event_t *self, struct perf_session *session);
 typedef int (*event_op2)(event_t *self, struct perf_session *session,
                         struct perf_event_ops *ops);
 
@@ -63,16 +71,19 @@ struct perf_event_ops {
                        lost,
                        read,
                        throttle,
-                       unthrottle,
-                       attr,
+                       unthrottle;
+       event_synth_op  attr,
                        event_type,
                        tracing_data,
                        build_id;
        event_op2       finished_round;
        bool            ordered_samples;
+       bool            ordering_requires_timestamps;
 };
 
-struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe);
+struct perf_session *perf_session__new(const char *filename, int mode,
+                                      bool force, bool repipe,
+                                      struct perf_event_ops *ops);
 void perf_session__delete(struct perf_session *self);
 
 void perf_event_header__bswap(struct perf_event_header *self);
@@ -98,8 +109,9 @@ void mem_bswap_64(void *src, int byte_size);
 
 int perf_session__create_kernel_maps(struct perf_session *self);
 
-int do_read(int fd, void *buf, size_t size);
 void perf_session__update_sample_type(struct perf_session *self);
+void perf_session__set_sample_id_all(struct perf_session *session, bool value);
+void perf_session__set_sample_type(struct perf_session *session, u64 type);
 void perf_session__remove_thread(struct perf_session *self, struct thread *th);
 
 static inline
index b62a553cc67d969c104638b9bf33922f351f06bb..f44fa541d56e67c6bb6c976e78123e99657ffbee 100644 (file)
@@ -170,7 +170,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
                return repsep_snprintf(bf, size, "%-*s", width, dso_name);
        }
 
-       return repsep_snprintf(bf, size, "%*Lx", width, self->ip);
+       return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
 }
 
 /* --sort symbol */
@@ -196,7 +196,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
 
        if (verbose) {
                char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
-               ret += repsep_snprintf(bf, size, "%*Lx %c ",
+               ret += repsep_snprintf(bf, size, "%-#*llx %c ",
                                       BITS_PER_LONG / 4, self->ip, o);
        }
 
@@ -205,7 +205,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
                ret += repsep_snprintf(bf + ret, size - ret, "%s",
                                       self->ms.sym->name);
        else
-               ret += repsep_snprintf(bf + ret, size - ret, "%*Lx",
+               ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
                                       BITS_PER_LONG / 4, self->ip);
 
        return ret;
index 0409fc7c0058809f01da646c4d5ee98c21f69b1d..8fc0bd3a3a4a6bd3f816a6619cc1de0793a98caa 100644 (file)
@@ -259,7 +259,7 @@ static bool __match_glob(const char *str, const char *pat, bool ignore_space)
                if (!*pat)      /* Tail wild card matches all */
                        return true;
                while (*str)
-                       if (strglobmatch(str++, pat))
+                       if (__match_glob(str++, pat, ignore_space))
                                return true;
        }
        return !*str && !*pat;
index d628c8d1cf5ec11111ab3b750da0d14f6d8fff7f..15ccfba8cdf805111d56b1e7f3bf71431ef2c1e4 100644 (file)
 #include <limits.h>
 #include <sys/utsname.h>
 
+#ifndef KSYM_NAME_LEN
+#define KSYM_NAME_LEN 128
+#endif
+
 #ifndef NT_GNU_BUILD_ID
 #define NT_GNU_BUILD_ID 3
 #endif
@@ -41,6 +45,7 @@ struct symbol_conf symbol_conf = {
        .exclude_other    = true,
        .use_modules      = true,
        .try_vmlinux_path = true,
+       .symfs            = "",
 };
 
 int dso__name_len(const struct dso *self)
@@ -92,7 +97,7 @@ static void symbols__fixup_end(struct rb_root *self)
                prev = curr;
                curr = rb_entry(nd, struct symbol, rb_node);
 
-               if (prev->end == prev->start)
+               if (prev->end == prev->start && prev->end != curr->start)
                        prev->end = curr->start - 1;
        }
 
@@ -121,7 +126,7 @@ static void __map_groups__fixup_end(struct map_groups *self, enum map_type type)
         * We still haven't the actual symbols, so guess the
         * last map final address.
         */
-       curr->end = ~0UL;
+       curr->end = ~0ULL;
 }
 
 static void map_groups__fixup_end(struct map_groups *self)
@@ -425,16 +430,25 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
 
 int kallsyms__parse(const char *filename, void *arg,
                    int (*process_symbol)(void *arg, const char *name,
-                                                    char type, u64 start))
+                                         char type, u64 start, u64 end))
 {
        char *line = NULL;
        size_t n;
-       int err = 0;
+       int err = -1;
+       u64 prev_start = 0;
+       char prev_symbol_type = 0;
+       char *prev_symbol_name;
        FILE *file = fopen(filename, "r");
 
        if (file == NULL)
                goto out_failure;
 
+       prev_symbol_name = malloc(KSYM_NAME_LEN);
+       if (prev_symbol_name == NULL)
+               goto out_close;
+
+       err = 0;
+
        while (!feof(file)) {
                u64 start;
                int line_len, len;
@@ -454,14 +468,33 @@ int kallsyms__parse(const char *filename, void *arg,
                        continue;
 
                symbol_type = toupper(line[len]);
-               symbol_name = line + len + 2;
+               len += 2;
+               symbol_name = line + len;
+               len = line_len - len;
 
-               err = process_symbol(arg, symbol_name, symbol_type, start);
-               if (err)
+               if (len >= KSYM_NAME_LEN) {
+                       err = -1;
                        break;
+               }
+
+               if (prev_symbol_type) {
+                       u64 end = start;
+                       if (end != prev_start)
+                               --end;
+                       err = process_symbol(arg, prev_symbol_name,
+                                            prev_symbol_type, prev_start, end);
+                       if (err)
+                               break;
+               }
+
+               memcpy(prev_symbol_name, symbol_name, len + 1);
+               prev_symbol_type = symbol_type;
+               prev_start = start;
        }
 
+       free(prev_symbol_name);
        free(line);
+out_close:
        fclose(file);
        return err;
 
@@ -483,7 +516,7 @@ static u8 kallsyms2elf_type(char type)
 }
 
 static int map__process_kallsym_symbol(void *arg, const char *name,
-                                      char type, u64 start)
+                                      char type, u64 start, u64 end)
 {
        struct symbol *sym;
        struct process_kallsyms_args *a = arg;
@@ -492,11 +525,8 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
        if (!symbol_type__is_a(type, a->map->type))
                return 0;
 
-       /*
-        * Will fix up the end later, when we have all symbols sorted.
-        */
-       sym = symbol__new(start, 0, kallsyms2elf_type(type), name);
-
+       sym = symbol__new(start, end - start + 1,
+                         kallsyms2elf_type(type), name);
        if (sym == NULL)
                return -ENOMEM;
        /*
@@ -649,7 +679,6 @@ int dso__load_kallsyms(struct dso *self, const char *filename,
        if (dso__load_all_kallsyms(self, filename, map) < 0)
                return -1;
 
-       symbols__fixup_end(&self->symbols[map->type]);
        if (self->kernel == DSO_TYPE_GUEST_KERNEL)
                self->origin = DSO__ORIG_GUEST_KERNEL;
        else
@@ -839,8 +868,11 @@ static int dso__synthesize_plt_symbols(struct  dso *self, struct map *map,
        char sympltname[1024];
        Elf *elf;
        int nr = 0, symidx, fd, err = 0;
+       char name[PATH_MAX];
 
-       fd = open(self->long_name, O_RDONLY);
+       snprintf(name, sizeof(name), "%s%s",
+                symbol_conf.symfs, self->long_name);
+       fd = open(name, O_RDONLY);
        if (fd < 0)
                goto out;
 
@@ -1452,16 +1484,19 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
             self->origin++) {
                switch (self->origin) {
                case DSO__ORIG_BUILD_ID_CACHE:
-                       if (dso__build_id_filename(self, name, size) == NULL)
+                       /* skip the locally configured cache if a symfs is given */
+                       if (symbol_conf.symfs[0] ||
+                           (dso__build_id_filename(self, name, size) == NULL)) {
                                continue;
+                       }
                        break;
                case DSO__ORIG_FEDORA:
-                       snprintf(name, size, "/usr/lib/debug%s.debug",
-                                self->long_name);
+                       snprintf(name, size, "%s/usr/lib/debug%s.debug",
+                                symbol_conf.symfs, self->long_name);
                        break;
                case DSO__ORIG_UBUNTU:
-                       snprintf(name, size, "/usr/lib/debug%s",
-                                self->long_name);
+                       snprintf(name, size, "%s/usr/lib/debug%s",
+                                symbol_conf.symfs, self->long_name);
                        break;
                case DSO__ORIG_BUILDID: {
                        char build_id_hex[BUILD_ID_SIZE * 2 + 1];
@@ -1473,19 +1508,26 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
                                          sizeof(self->build_id),
                                          build_id_hex);
                        snprintf(name, size,
-                                "/usr/lib/debug/.build-id/%.2s/%s.debug",
-                                build_id_hex, build_id_hex + 2);
+                                "%s/usr/lib/debug/.build-id/%.2s/%s.debug",
+                                symbol_conf.symfs, build_id_hex, build_id_hex + 2);
                        }
                        break;
                case DSO__ORIG_DSO:
-                       snprintf(name, size, "%s", self->long_name);
+                       snprintf(name, size, "%s%s",
+                            symbol_conf.symfs, self->long_name);
                        break;
                case DSO__ORIG_GUEST_KMODULE:
                        if (map->groups && map->groups->machine)
                                root_dir = map->groups->machine->root_dir;
                        else
                                root_dir = "";
-                       snprintf(name, size, "%s%s", root_dir, self->long_name);
+                       snprintf(name, size, "%s%s%s", symbol_conf.symfs,
+                                root_dir, self->long_name);
+                       break;
+
+               case DSO__ORIG_KMODULE:
+                       snprintf(name, size, "%s%s", symbol_conf.symfs,
+                                self->long_name);
                        break;
 
                default:
@@ -1780,21 +1822,24 @@ out_failure:
        return -1;
 }
 
-static int dso__load_vmlinux(struct dso *self, struct map *map,
-                            const char *vmlinux, symbol_filter_t filter)
+int dso__load_vmlinux(struct dso *self, struct map *map,
+                     const char *vmlinux, symbol_filter_t filter)
 {
        int err = -1, fd;
+       char symfs_vmlinux[PATH_MAX];
 
-       fd = open(vmlinux, O_RDONLY);
+       snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s/%s",
+                symbol_conf.symfs, vmlinux);
+       fd = open(symfs_vmlinux, O_RDONLY);
        if (fd < 0)
                return -1;
 
        dso__set_loaded(self, map->type);
-       err = dso__load_sym(self, map, vmlinux, fd, filter, 0, 0);
+       err = dso__load_sym(self, map, symfs_vmlinux, fd, filter, 0, 0);
        close(fd);
 
        if (err > 0)
-               pr_debug("Using %s for symbols\n", vmlinux);
+               pr_debug("Using %s for symbols\n", symfs_vmlinux);
 
        return err;
 }
@@ -1836,8 +1881,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
        const char *kallsyms_filename = NULL;
        char *kallsyms_allocated_filename = NULL;
        /*
-        * Step 1: if the user specified a vmlinux filename, use it and only
-        * it, reporting errors to the user if it cannot be used.
+        * Step 1: if the user specified a kallsyms or vmlinux filename, use
+        * it and only it, reporting errors to the user if it cannot be used.
         *
         * For instance, try to analyse an ARM perf.data file _without_ a
         * build-id, or if the user specifies the wrong path to the right
@@ -1850,6 +1895,11 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
         * validation in dso__load_vmlinux and will bail out if they don't
         * match.
         */
+       if (symbol_conf.kallsyms_name != NULL) {
+               kallsyms_filename = symbol_conf.kallsyms_name;
+               goto do_kallsyms;
+       }
+
        if (symbol_conf.vmlinux_name != NULL) {
                err = dso__load_vmlinux(self, map,
                                        symbol_conf.vmlinux_name, filter);
@@ -1867,6 +1917,10 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
                        goto out_fixup;
        }
 
+       /* do not try local files if a symfs was given */
+       if (symbol_conf.symfs[0] != 0)
+               return -1;
+
        /*
         * Say the kernel DSO was created when processing the build-id header table,
         * we have a build-id, so check if it is the same as the running kernel,
@@ -2136,7 +2190,7 @@ struct process_args {
 };
 
 static int symbol__in_kernel(void *arg, const char *name,
-                            char type __used, u64 start)
+                            char type __used, u64 start, u64 end __used)
 {
        struct process_args *args = arg;
 
@@ -2257,9 +2311,6 @@ static int vmlinux_path__init(void)
        struct utsname uts;
        char bf[PATH_MAX];
 
-       if (uname(&uts) < 0)
-               return -1;
-
        vmlinux_path = malloc(sizeof(char *) * 5);
        if (vmlinux_path == NULL)
                return -1;
@@ -2272,6 +2323,14 @@ static int vmlinux_path__init(void)
        if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
                goto out_fail;
        ++vmlinux_path__nr_entries;
+
+       /* only try running kernel version if no symfs was given */
+       if (symbol_conf.symfs[0] != 0)
+               return 0;
+
+       if (uname(&uts) < 0)
+               return -1;
+
        snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release);
        vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
        if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
@@ -2331,6 +2390,8 @@ static int setup_list(struct strlist **list, const char *list_str,
 
 int symbol__init(void)
 {
+       const char *symfs;
+
        if (symbol_conf.initialized)
                return 0;
 
@@ -2359,6 +2420,18 @@ int symbol__init(void)
                       symbol_conf.sym_list_str, "symbol") < 0)
                goto out_free_comm_list;
 
+       /*
+        * A path to symbols of "/" is identical to ""
+        * reset here for simplicity.
+        */
+       symfs = realpath(symbol_conf.symfs, NULL);
+       if (symfs == NULL)
+               symfs = symbol_conf.symfs;
+       if (strcmp(symfs, "/") == 0)
+               symbol_conf.symfs = "";
+       if (symfs != symbol_conf.symfs)
+               free((void *)symfs);
+
        symbol_conf.initialized = true;
        return 0;
 
index 038f2201ee09579ca3f460d9f59576770ea477d2..670cd1c88f54dc932d18b7c79d609d61e7560295 100644 (file)
@@ -72,6 +72,7 @@ struct symbol_conf {
                        show_cpu_utilization,
                        initialized;
        const char      *vmlinux_name,
+                       *kallsyms_name,
                        *source_prefix,
                        *field_sep;
        const char      *default_guest_vmlinux_name,
@@ -85,6 +86,7 @@ struct symbol_conf {
        struct strlist  *dso_list,
                        *comm_list,
                        *sym_list;
+       const char      *symfs;
 };
 
 extern struct symbol_conf symbol_conf;
@@ -166,6 +168,8 @@ void dso__sort_by_name(struct dso *self, enum map_type type);
 struct dso *__dsos__findnew(struct list_head *head, const char *name);
 
 int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
+int dso__load_vmlinux(struct dso *self, struct map *map,
+                     const char *vmlinux, symbol_filter_t filter);
 int dso__load_vmlinux_path(struct dso *self, struct map *map,
                           symbol_filter_t filter);
 int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map,
@@ -213,7 +217,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
 int build_id__sprintf(const u8 *self, int len, char *bf);
 int kallsyms__parse(const char *filename, void *arg,
                    int (*process_symbol)(void *arg, const char *name,
-                                         char type, u64 start));
+                                         char type, u64 start, u64 end));
 
 void machine__destroy_kernel_maps(struct machine *self);
 int __machine__create_kernel_maps(struct machine *self, struct dso *kernel);
index 8c72d888e449989dcdf1c349fdc4a97ead6b694a..00f4eade2e3e9c1fdd2e9c92c83222a10c1555fd 100644 (file)
@@ -16,35 +16,50 @@ static int filter(const struct dirent *dir)
                return 1;
 }
 
-int find_all_tid(int pid, pid_t ** all_tid)
+struct thread_map *thread_map__new_by_pid(pid_t pid)
 {
+       struct thread_map *threads;
        char name[256];
        int items;
        struct dirent **namelist = NULL;
-       int ret = 0;
        int i;
 
        sprintf(name, "/proc/%d/task", pid);
        items = scandir(name, &namelist, filter, NULL);
        if (items <= 0)
-                return -ENOENT;
-       *all_tid = malloc(sizeof(pid_t) * items);
-       if (!*all_tid) {
-               ret = -ENOMEM;
-               goto failure;
-       }
-
-       for (i = 0; i < items; i++)
-               (*all_tid)[i] = atoi(namelist[i]->d_name);
+                return NULL;
 
-       ret = items;
+       threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
+       if (threads != NULL) {
+               for (i = 0; i < items; i++)
+                       threads->map[i] = atoi(namelist[i]->d_name);
+               threads->nr = items;
+       }
 
-failure:
        for (i=0; i<items; i++)
                free(namelist[i]);
        free(namelist);
 
-       return ret;
+       return threads;
+}
+
+struct thread_map *thread_map__new_by_tid(pid_t tid)
+{
+       struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+
+       if (threads != NULL) {
+               threads->map[0] = tid;
+               threads->nr     = 1;
+       }
+
+       return threads;
+}
+
+struct thread_map *thread_map__new(pid_t pid, pid_t tid)
+{
+       if (pid != -1)
+               return thread_map__new_by_pid(pid);
+       return thread_map__new_by_tid(tid);
 }
 
 static struct thread *thread__new(pid_t pid)
index 688500ff826f008a4c48cd5bf782ad60c1363325..d7574101054a8ae9b6ded40fd0a813c3cacf9965 100644 (file)
@@ -18,11 +18,24 @@ struct thread {
        int                     comm_len;
 };
 
+struct thread_map {
+       int nr;
+       int map[];
+};
+
 struct perf_session;
 
 void thread__delete(struct thread *self);
 
-int find_all_tid(int pid, pid_t ** all_tid);
+struct thread_map *thread_map__new_by_pid(pid_t pid);
+struct thread_map *thread_map__new_by_tid(pid_t tid);
+struct thread_map *thread_map__new(pid_t pid, pid_t tid);
+
+static inline void thread_map__delete(struct thread_map *threads)
+{
+       free(threads);
+}
+
 int thread__set_comm(struct thread *self, const char *comm);
 int thread__comm_len(struct thread *self);
 struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
index b1572601286cad7020c323d175177d0445eb806f..35729f4c40cb7a98e891a2013d4b6e812a24237e 100644 (file)
 #include <ctype.h>
 #include <errno.h>
 #include <stdbool.h>
+#include <linux/list.h>
 #include <linux/kernel.h>
 
 #include "../perf.h"
 #include "trace-event.h"
 #include "debugfs.h"
+#include "evsel.h"
 
 #define VERSION "0.5"
 
@@ -469,16 +471,17 @@ out:
 }
 
 static struct tracepoint_path *
-get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
+get_tracepoints_path(struct list_head *pattrs)
 {
        struct tracepoint_path path, *ppath = &path;
-       int i, nr_tracepoints = 0;
+       struct perf_evsel *pos;
+       int nr_tracepoints = 0;
 
-       for (i = 0; i < nb_events; i++) {
-               if (pattrs[i].type != PERF_TYPE_TRACEPOINT)
+       list_for_each_entry(pos, pattrs, node) {
+               if (pos->attr.type != PERF_TYPE_TRACEPOINT)
                        continue;
                ++nr_tracepoints;
-               ppath->next = tracepoint_id_to_path(pattrs[i].config);
+               ppath->next = tracepoint_id_to_path(pos->attr.config);
                if (!ppath->next)
                        die("%s\n", "No memory to alloc tracepoints list");
                ppath = ppath->next;
@@ -487,21 +490,21 @@ get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
        return nr_tracepoints > 0 ? path.next : NULL;
 }
 
-bool have_tracepoints(struct perf_event_attr *pattrs, int nb_events)
+bool have_tracepoints(struct list_head *pattrs)
 {
-       int i;
+       struct perf_evsel *pos;
 
-       for (i = 0; i < nb_events; i++)
-               if (pattrs[i].type == PERF_TYPE_TRACEPOINT)
+       list_for_each_entry(pos, pattrs, node)
+               if (pos->attr.type == PERF_TYPE_TRACEPOINT)
                        return true;
 
        return false;
 }
 
-int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
+int read_tracing_data(int fd, struct list_head *pattrs)
 {
        char buf[BUFSIZ];
-       struct tracepoint_path *tps = get_tracepoints_path(pattrs, nb_events);
+       struct tracepoint_path *tps = get_tracepoints_path(pattrs);
 
        /*
         * What? No tracepoints? No sense writing anything here, bail out.
@@ -545,14 +548,13 @@ int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
        return 0;
 }
 
-ssize_t read_tracing_data_size(int fd, struct perf_event_attr *pattrs,
-                              int nb_events)
+ssize_t read_tracing_data_size(int fd, struct list_head *pattrs)
 {
        ssize_t size;
        int err = 0;
 
        calc_data_size = 1;
-       err = read_tracing_data(fd, pattrs, nb_events);
+       err = read_tracing_data(fd, pattrs);
        size = calc_data_size - 1;
        calc_data_size = 0;
 
index b3e86b1e44443909e518573ab0a1d49f8167f151..b5f12ca24d99f877877b701ae6f8b0d7f6b3acca 100644 (file)
@@ -262,9 +262,8 @@ raw_field_value(struct event *event, const char *name, void *data);
 void *raw_field_ptr(struct event *event, const char *name, void *data);
 unsigned long long eval_flag(const char *flag);
 
-int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events);
-ssize_t read_tracing_data_size(int fd, struct perf_event_attr *pattrs,
-                              int nb_events);
+int read_tracing_data(int fd, struct list_head *pattrs);
+ssize_t read_tracing_data_size(int fd, struct list_head *pattrs);
 
 /* taken from kernel/trace/trace.h */
 enum trace_flag_type {
index 056c69521a38098a8053d18f3e615d3a35390fd4..7b5a8926624e49be67d802fc91afc56451bead23 100644 (file)
@@ -104,10 +104,24 @@ out_destroy_form:
        return rc;
 }
 
-static const char yes[] = "Yes", no[] = "No";
+static const char yes[] = "Yes", no[] = "No",
+                 warning_str[] = "Warning!", ok[] = "Ok";
 
 bool ui__dialog_yesno(const char *msg)
 {
        /* newtWinChoice should really be accepting const char pointers... */
        return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1;
 }
+
+void ui__warning(const char *format, ...)
+{
+       va_list args;
+
+       va_start(args, format);
+       if (use_browser > 0)
+               newtWinMessagev((char *)warning_str, (char *)ok,
+                               (char *)format, args);
+       else
+               vfprintf(stderr, format, args);
+       va_end(args);
+}
index 214265674ddda0a59488cc0871a0943b20fc6bd4..5b3ea49aa63ea1ff35b7d949b49a661187aa16e6 100644 (file)
@@ -114,3 +114,20 @@ unsigned long convert_unit(unsigned long value, char *unit)
 
        return value;
 }
+
+int readn(int fd, void *buf, size_t n)
+{
+       void *buf_start = buf;
+
+       while (n) {
+               int ret = read(fd, buf, n);
+
+               if (ret <= 0)
+                       return ret;
+
+               n -= ret;
+               buf += ret;
+       }
+
+       return buf - buf_start;
+}
index 7562707ddd1c491755dc8ea5121637918ba1b844..e833f26f3bfc7d702cd58ba55c7ae85657fe3801 100644 (file)
@@ -265,6 +265,7 @@ void argv_free(char **argv);
 bool strglobmatch(const char *str, const char *pat);
 bool strlazymatch(const char *str, const char *pat);
 unsigned long convert_unit(unsigned long value, char *unit);
+int readn(int fd, void *buf, size_t size);
 
 #define _STR(x) #x
 #define STR(x) _STR(x)
diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c
new file mode 100644 (file)
index 0000000..22afbf6
--- /dev/null
@@ -0,0 +1,20 @@
+#include "xyarray.h"
+#include "util.h"
+
+struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
+{
+       size_t row_size = ylen * entry_size;
+       struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size);
+
+       if (xy != NULL) {
+               xy->entry_size = entry_size;
+               xy->row_size   = row_size;
+       }
+
+       return xy;
+}
+
+void xyarray__delete(struct xyarray *xy)
+{
+       free(xy);
+}
diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h
new file mode 100644 (file)
index 0000000..c488a07
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef _PERF_XYARRAY_H_
+#define _PERF_XYARRAY_H_ 1
+
+#include <sys/types.h>
+
+struct xyarray {
+       size_t row_size;
+       size_t entry_size;
+       char contents[];
+};
+
+struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
+void xyarray__delete(struct xyarray *xy);
+
+static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
+{
+       return &xy->contents[x * xy->row_size + y * xy->entry_size];
+}
+
+#endif /* _PERF_XYARRAY_H_ */